瀏覽代碼

Merge pull request #976 from ThomasWaldmann/issue-974

fixes Issue 974
enkore 9 年之前
父節點
當前提交
96226b5f81
共有 11 個文件被更改,包括 63 次插入47 次删除
  1. 10 11
      borg/archive.py
  2. 5 5
      borg/archiver.py
  3. 8 8
      borg/cache.py
  4. 9 1
      borg/helpers.py
  5. 5 6
      borg/key.py
  6. 5 1
      borg/remote.py
  7. 7 3
      borg/repository.py
  8. 2 3
      borg/testsuite/archiver.py
  9. 6 1
      borg/testsuite/helpers.py
  10. 2 1
      borg/testsuite/key.py
  11. 4 7
      borg/upgrader.py

+ 10 - 11
borg/archive.py

@@ -1,4 +1,3 @@
-from binascii import hexlify
 from datetime import datetime, timezone
 from getpass import getuser
 from itertools import groupby
@@ -19,8 +18,8 @@ from . import xattr
 from .compress import Compressor, COMPR_BUFFER
 from .constants import *  # NOQA
 from .helpers import Chunk, Error, uid2user, user2uid, gid2group, group2gid, \
-    parse_timestamp, to_localtime, format_time, format_timedelta, \
-    Manifest, Statistics, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, \
+    parse_timestamp, to_localtime, format_time, format_timedelta, safe_encode, safe_decode, \
+    Manifest, Statistics, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, bin_to_hex, \
     ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, \
     PathPrefixPattern, FnmatchPattern, open_item, file_status, format_file_size, consume
 from .repository import Repository
@@ -176,7 +175,7 @@ class Archive:
         self.id = id
         self.metadata = self._load_meta(self.id)
         decode_dict(self.metadata, ARCHIVE_TEXT_KEYS)
-        self.metadata[b'cmdline'] = [arg.decode('utf-8', 'surrogateescape') for arg in self.metadata[b'cmdline']]
+        self.metadata[b'cmdline'] = [safe_decode(arg) for arg in self.metadata[b'cmdline']]
         self.name = self.metadata[b'name']
 
     @property
@@ -194,7 +193,7 @@ class Archive:
 
     @property
     def fpr(self):
-        return hexlify(self.id).decode('ascii')
+        return bin_to_hex(self.id)
 
     @property
     def duration(self):
@@ -567,7 +566,7 @@ Number of files: {0.stats.nfiles}'''.format(
                 return status
             else:
                 self.hard_links[st.st_ino, st.st_dev] = safe_path
-        path_hash = self.key.id_hash(os.path.join(self.cwd, path).encode('utf-8', 'surrogateescape'))
+        path_hash = self.key.id_hash(safe_encode(os.path.join(self.cwd, path)))
         first_run = not cache.files
         ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode)
         if first_run:
@@ -795,7 +794,7 @@ class ArchiveChecker:
             for chunk_id, size, csize in item[b'chunks']:
                 if chunk_id not in self.chunks:
                     # If a file chunk is missing, create an all empty replacement chunk
-                    logger.error('{}: Missing file chunk detected (Byte {}-{})'.format(item[b'path'].decode('utf-8', 'surrogateescape'), offset, offset + size))
+                    logger.error('{}: Missing file chunk detected (Byte {}-{})'.format(safe_decode(item[b'path']), offset, offset + size))
                     self.error_found = True
                     data = bytes(size)
                     chunk_id = self.key.id_hash(data)
@@ -823,7 +822,7 @@ class ArchiveChecker:
                 return _state
 
             def report(msg, chunk_id, chunk_no):
-                cid = hexlify(chunk_id).decode('ascii')
+                cid = bin_to_hex(chunk_id)
                 msg += ' [chunk: %06d_%s]' % (chunk_no, cid)  # see debug-dump-archive-items
                 self.error_found = True
                 logger.error(msg)
@@ -882,7 +881,7 @@ class ArchiveChecker:
                 if archive[b'version'] != 1:
                     raise Exception('Unknown archive metadata version')
                 decode_dict(archive, ARCHIVE_TEXT_KEYS)
-                archive[b'cmdline'] = [arg.decode('utf-8', 'surrogateescape') for arg in archive[b'cmdline']]
+                archive[b'cmdline'] = [safe_decode(arg) for arg in archive[b'cmdline']]
                 items_buffer = ChunkBuffer(self.key)
                 items_buffer.write_chunk = add_callback
                 for item in robust_iterator(archive):
@@ -1187,10 +1186,10 @@ class ArchiveRecreater:
         logger.info('Found %s, will resume interrupted operation', target_name)
         old_target = self.open_archive(target_name)
         resume_id = old_target.metadata[b'recreate_source_id']
-        resume_args = [arg.decode('utf-8', 'surrogateescape') for arg in old_target.metadata[b'recreate_args']]
+        resume_args = [safe_decode(arg) for arg in old_target.metadata[b'recreate_args']]
         if resume_id != archive.id:
             logger.warning('Source archive changed, will discard %s and start over', target_name)
-            logger.warning('Saved fingerprint:   %s', hexlify(resume_id).decode('ascii'))
+            logger.warning('Saved fingerprint:   %s', bin_to_hex(resume_id))
             logger.warning('Current fingerprint: %s', archive.fpr)
             old_target.delete(Statistics(), progress=self.progress)
             return None, None  # can't resume

+ 5 - 5
borg/archiver.py

@@ -1,4 +1,4 @@
-from binascii import hexlify, unhexlify
+from binascii import unhexlify
 from datetime import datetime
 from itertools import zip_longest
 from operator import attrgetter
@@ -19,7 +19,7 @@ import traceback
 from . import __version__
 from .helpers import Error, location_validator, archivename_validator, format_time, format_file_size, \
     parse_pattern, PathPrefixPattern, to_localtime, timestamp, \
-    get_cache_dir, prune_within, prune_split, \
+    get_cache_dir, prune_within, prune_split, bin_to_hex, safe_encode, \
     Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \
     dir_is_tagged, ChunkerParams, CompressionSpec, is_slow_msgpack, yes, sysinfo, \
     log_multi, PatternMatcher, ItemFormatter
@@ -739,7 +739,7 @@ class Archiver:
                 else:
                     write = sys.stdout.buffer.write
                 for item in archive.iter_items(lambda item: matcher.match(item[b'path'])):
-                    write(formatter.format_item(item).encode('utf-8', errors='surrogateescape'))
+                    write(safe_encode(formatter.format_item(item)))
         else:
             for archive_info in manifest.list_archive_infos(sort_by='ts'):
                 if args.prefix and not archive_info.name.startswith(args.prefix):
@@ -759,7 +759,7 @@ class Archiver:
 
         stats = archive.calc_stats(cache)
         print('Name:', archive.name)
-        print('Fingerprint: %s' % hexlify(archive.id).decode('ascii'))
+        print('Fingerprint: %s' % archive.fpr)
         print('Comment:', archive.metadata.get(b'comment', ''))
         print('Hostname:', archive.metadata[b'hostname'])
         print('Username:', archive.metadata[b'username'])
@@ -901,7 +901,7 @@ class Archiver:
         archive = Archive(repository, key, manifest, args.location.archive)
         for i, item_id in enumerate(archive.metadata[b'items']):
             _, data = key.decrypt(item_id, repository.get(item_id))
-            filename = '%06d_%s.items' % (i, hexlify(item_id).decode('ascii'))
+            filename = '%06d_%s.items' % (i, bin_to_hex(item_id))
             print('Dumping', filename)
             with open(filename, 'wb') as fd:
                 fd.write(data)

+ 8 - 8
borg/cache.py

@@ -3,14 +3,14 @@ from .remote import cache_if_remote
 from collections import namedtuple
 import os
 import stat
-from binascii import hexlify, unhexlify
+from binascii import unhexlify
 import shutil
 
 from .key import PlaintextKey
 from .logger import create_logger
 logger = create_logger()
 from .helpers import Error, get_cache_dir, decode_dict, int_to_bigint, \
-    bigint_to_int, format_file_size, yes
+    bigint_to_int, bin_to_hex, format_file_size, yes
 from .locking import UpgradableLock
 from .hashindex import ChunkIndex, ChunkIndexEntry
 
@@ -37,13 +37,13 @@ class Cache:
 
     @staticmethod
     def break_lock(repository, path=None):
-        path = path or os.path.join(get_cache_dir(), hexlify(repository.id).decode('ascii'))
+        path = path or os.path.join(get_cache_dir(), repository.id_str)
         UpgradableLock(os.path.join(path, 'lock'), exclusive=True).break_lock()
 
     @staticmethod
     def destroy(repository, path=None):
         """destroy the cache for ``repository`` or at ``path``"""
-        path = path or os.path.join(get_cache_dir(), hexlify(repository.id).decode('ascii'))
+        path = path or os.path.join(get_cache_dir(), repository.id_str)
         config = os.path.join(path, 'config')
         if os.path.exists(config):
             os.remove(config)  # kill config first
@@ -64,7 +64,7 @@ class Cache:
         self.repository = repository
         self.key = key
         self.manifest = manifest
-        self.path = path or os.path.join(get_cache_dir(), hexlify(repository.id).decode('ascii'))
+        self.path = path or os.path.join(get_cache_dir(), repository.id_str)
         self.do_files = do_files
         # Warn user before sending data to a never seen before unencrypted repository
         if not os.path.exists(self.path):
@@ -134,7 +134,7 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
         config = configparser.ConfigParser(interpolation=None)
         config.add_section('cache')
         config.set('cache', 'version', '1')
-        config.set('cache', 'repository', hexlify(self.repository.id).decode('ascii'))
+        config.set('cache', 'repository', self.repository.id_str)
         config.set('cache', 'manifest', '')
         with open(os.path.join(self.path, 'config'), 'w') as fd:
             config.write(fd)
@@ -214,7 +214,7 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
                     entry = FileCacheEntry(*msgpack.unpackb(item))
                     if entry.age < 10 and bigint_to_int(entry.mtime) < self._newest_mtime:
                         msgpack.pack((path_hash, entry), fd)
-        self.config.set('cache', 'manifest', hexlify(self.manifest.id).decode('ascii'))
+        self.config.set('cache', 'manifest', self.manifest.id_str)
         self.config.set('cache', 'timestamp', self.manifest.timestamp)
         self.config.set('cache', 'key_type', str(self.key.TYPE))
         self.config.set('cache', 'previous_location', self.repository._location.canonical_path())
@@ -257,7 +257,7 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
         archive_path = os.path.join(self.path, 'chunks.archive.d')
 
         def mkpath(id, suffix=''):
-            id_hex = hexlify(id).decode('ascii')
+            id_hex = bin_to_hex(id)
             path = os.path.join(archive_path, id_hex + suffix)
             return path.encode('utf-8')
 

+ 9 - 1
borg/helpers.py

@@ -95,6 +95,10 @@ class Manifest:
         self.key = key
         self.repository = repository
 
+    @property
+    def id_str(self):
+        return bin_to_hex(self.id)
+
     @classmethod
     def load(cls, repository, key=None):
         from .key import key_factory
@@ -658,7 +662,7 @@ def format_archive(archive):
     return '%-36s %s [%s]' % (
         archive.name,
         format_time(to_localtime(archive.ts)),
-        hexlify(archive.id).decode('ascii'),
+        bin_to_hex(archive.id),
     )
 
 
@@ -731,6 +735,10 @@ def safe_encode(s, coding='utf-8', errors='surrogateescape'):
     return s.encode(coding, errors)
 
 
+def bin_to_hex(binary):
+    return hexlify(binary).decode('ascii')
+
+
 class Location:
     """Object representing a repository / archive location
     """

+ 5 - 6
borg/key.py

@@ -1,4 +1,4 @@
-from binascii import hexlify, a2b_base64, b2a_base64
+from binascii import a2b_base64, b2a_base64
 import configparser
 import getpass
 import os
@@ -7,7 +7,7 @@ import textwrap
 from hmac import compare_digest
 from hashlib import sha256, pbkdf2_hmac
 
-from .helpers import Chunk, IntegrityError, get_keys_dir, Error, yes
+from .helpers import Chunk, IntegrityError, get_keys_dir, Error, yes, bin_to_hex
 from .logger import create_logger
 logger = create_logger()
 
@@ -203,7 +203,7 @@ class Passphrase(str):
                 passphrase.encode('ascii')
             except UnicodeEncodeError:
                 print('Your passphrase (UTF-8 encoding in hex): %s' %
-                      hexlify(passphrase.encode('utf-8')).decode('ascii'),
+                      bin_to_hex(passphrase.encode('utf-8')),
                       file=sys.stderr)
                 print('As you have a non-ASCII passphrase, it is recommended to keep the UTF-8 encoding in hex together with the passphrase at a safe place.',
                       file=sys.stderr)
@@ -397,13 +397,12 @@ class KeyfileKey(KeyfileKeyBase):
     FILE_ID = 'BORG_KEY'
 
     def find_key(self):
-        id = hexlify(self.repository.id).decode('ascii')
         keys_dir = get_keys_dir()
         for name in os.listdir(keys_dir):
             filename = os.path.join(keys_dir, name)
             with open(filename, 'r') as fd:
                 line = fd.readline().strip()
-                if line.startswith(self.FILE_ID) and line[len(self.FILE_ID) + 1:] == id:
+                if line.startswith(self.FILE_ID) and line[len(self.FILE_ID) + 1:] == self.repository.id_str:
                     return filename
         raise KeyfileNotFoundError(self.repository._location.canonical_path(), get_keys_dir())
 
@@ -427,7 +426,7 @@ class KeyfileKey(KeyfileKeyBase):
     def save(self, target, passphrase):
         key_data = self._save(passphrase)
         with open(target, 'w') as fd:
-            fd.write('%s %s\n' % (self.FILE_ID, hexlify(self.repository_id).decode('ascii')))
+            fd.write('%s %s\n' % (self.FILE_ID, bin_to_hex(self.repository_id)))
             fd.write(key_data)
             fd.write('\n')
         self.target = target

+ 5 - 1
borg/remote.py

@@ -10,7 +10,7 @@ import tempfile
 
 from . import __version__
 
-from .helpers import Error, IntegrityError, get_home_dir, sysinfo
+from .helpers import Error, IntegrityError, get_home_dir, sysinfo, bin_to_hex
 from .repository import Repository
 
 import msgpack
@@ -191,6 +191,10 @@ class RemoteRepository:
             self.rollback()
         self.close()
 
+    @property
+    def id_str(self):
+        return bin_to_hex(self.id)
+
     def borg_cmd(self, args, testing):
         """return a borg serve command line"""
         # give some args/options to "borg serve" process as they were given to us

+ 7 - 3
borg/repository.py

@@ -1,5 +1,5 @@
 from configparser import ConfigParser
-from binascii import hexlify, unhexlify
+from binascii import unhexlify
 from datetime import datetime
 from itertools import islice
 import errno
@@ -13,7 +13,7 @@ from zlib import crc32
 
 import msgpack
 from .constants import *  # NOQA
-from .helpers import Error, ErrorWithTraceback, IntegrityError, Location, ProgressIndicatorPercent
+from .helpers import Error, ErrorWithTraceback, IntegrityError, Location, ProgressIndicatorPercent, bin_to_hex
 from .hashindex import NSIndex
 from .locking import UpgradableLock, LockError, LockErrorT
 from .lrucache import LRUCache
@@ -83,6 +83,10 @@ class Repository:
             self.rollback()
         self.close()
 
+    @property
+    def id_str(self):
+        return bin_to_hex(self.id)
+
     def create(self, path):
         """Create a new empty repository at `path`
         """
@@ -99,7 +103,7 @@ class Repository:
         config.set('repository', 'segments_per_dir', str(DEFAULT_SEGMENTS_PER_DIR))
         config.set('repository', 'max_segment_size', str(DEFAULT_MAX_SEGMENT_SIZE))
         config.set('repository', 'append_only', '0')
-        config.set('repository', 'id', hexlify(os.urandom(32)).decode('ascii'))
+        config.set('repository', 'id', bin_to_hex(os.urandom(32)))
         self.save_config(path, config)
 
     def save_config(self, path, config):

+ 2 - 3
borg/testsuite/archiver.py

@@ -1,4 +1,3 @@
-from binascii import hexlify
 from configparser import ConfigParser
 import errno
 import os
@@ -23,7 +22,7 @@ from ..archiver import Archiver
 from ..cache import Cache
 from ..constants import *  # NOQA
 from ..crypto import bytes_to_long, num_aes_blocks
-from ..helpers import Chunk, Manifest, EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR
+from ..helpers import Chunk, Manifest, EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, bin_to_hex
 from ..key import KeyfileKeyBase
 from ..remote import RemoteRepository, PathNotAllowed
 from ..repository import Repository
@@ -377,7 +376,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
     def _set_repository_id(self, path, id):
         config = ConfigParser(interpolation=None)
         config.read(os.path.join(path, 'config'))
-        config.set('repository', 'id', hexlify(id).decode('ascii'))
+        config.set('repository', 'id', bin_to_hex(id))
         with open(os.path.join(path, 'config'), 'w') as fd:
             config.write(fd)
         with Repository(self.repository_path) as repository:

+ 6 - 1
borg/testsuite/helpers.py

@@ -13,7 +13,7 @@ import time
 from ..helpers import Location, format_file_size, format_timedelta, make_path_safe, \
     prune_within, prune_split, get_cache_dir, get_keys_dir, Statistics, is_slow_msgpack, \
     yes, TRUISH, FALSISH, DEFAULTISH, \
-    StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams, Chunk, \
+    StableDict, int_to_bigint, bigint_to_int, bin_to_hex, parse_timestamp, CompressionSpec, ChunkerParams, Chunk, \
     ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern, \
     PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, partial_format, ChunkIteratorFileWrapper
 from . import BaseTestCase, environment_variable, FakeInputs
@@ -31,6 +31,11 @@ class BigIntTestCase(BaseTestCase):
         self.assert_equal(bigint_to_int(int_to_bigint(2**70)), 2**70)
 
 
+def test_bin_to_hex():
+    assert bin_to_hex(b'') == ''
+    assert bin_to_hex(b'\x00\x01\xff') == '0001ff'
+
+
 class TestLocationWithoutEnv:
     def test_ssh(self, monkeypatch):
         monkeypatch.delenv('BORG_REPO', raising=False)

+ 2 - 1
borg/testsuite/key.py

@@ -6,7 +6,7 @@ from binascii import hexlify, unhexlify
 
 from ..crypto import bytes_to_long, num_aes_blocks
 from ..key import PlaintextKey, PassphraseKey, KeyfileKey
-from ..helpers import Location, Chunk
+from ..helpers import Location, Chunk, bin_to_hex
 from . import BaseTestCase
 
 
@@ -44,6 +44,7 @@ class KeyTestCase(BaseTestCase):
 
         _location = _Location()
         id = bytes(32)
+        id_str = bin_to_hex(id)
 
     def test_plaintext(self):
         key = PlaintextKey.create(None, None)

+ 4 - 7
borg/upgrader.py

@@ -1,4 +1,3 @@
-from binascii import hexlify
 import datetime
 import logging
 logger = logging.getLogger(__name__)
@@ -189,8 +188,8 @@ class AtticRepositoryUpgrader(Repository):
         attic_cache_dir = os.environ.get('ATTIC_CACHE_DIR',
                                          os.path.join(get_home_dir(),
                                                       '.cache', 'attic'))
-        attic_cache_dir = os.path.join(attic_cache_dir, hexlify(self.id).decode('ascii'))
-        borg_cache_dir = os.path.join(get_cache_dir(), hexlify(self.id).decode('ascii'))
+        attic_cache_dir = os.path.join(attic_cache_dir, self.id_str)
+        borg_cache_dir = os.path.join(get_cache_dir(), self.id_str)
 
         def copy_cache_file(path):
             """copy the given attic cache path into the borg directory
@@ -264,7 +263,6 @@ class AtticKeyfileKey(KeyfileKey):
            assume the repository has been opened by the archiver yet
         """
         get_keys_dir = cls.get_keys_dir
-        id = hexlify(repository.id).decode('ascii')
         keys_dir = get_keys_dir()
         if not os.path.exists(keys_dir):
             raise KeyfileNotFoundError(repository.path, keys_dir)
@@ -272,7 +270,7 @@ class AtticKeyfileKey(KeyfileKey):
             filename = os.path.join(keys_dir, name)
             with open(filename, 'r') as fd:
                 line = fd.readline().strip()
-                if line and line.startswith(cls.FILE_ID) and line[10:] == id:
+                if line and line.startswith(cls.FILE_ID) and line[10:] == repository.id_str:
                     return filename
         raise KeyfileNotFoundError(repository.path, keys_dir)
 
@@ -314,7 +312,6 @@ class Borg0xxKeyfileKey(KeyfileKey):
     @classmethod
     def find_key_file(cls, repository):
         get_keys_dir = cls.get_keys_dir
-        id = hexlify(repository.id).decode('ascii')
         keys_dir = get_keys_dir()
         if not os.path.exists(keys_dir):
             raise KeyfileNotFoundError(repository.path, keys_dir)
@@ -322,6 +319,6 @@ class Borg0xxKeyfileKey(KeyfileKey):
             filename = os.path.join(keys_dir, name)
             with open(filename, 'r') as fd:
                 line = fd.readline().strip()
-                if line and line.startswith(cls.FILE_ID) and line[len(cls.FILE_ID) + 1:] == id:
+                if line and line.startswith(cls.FILE_ID) and line[len(cls.FILE_ID) + 1:] == repository.id_str:
                     return filename
         raise KeyfileNotFoundError(repository.path, keys_dir)