Browse Source

more safe interface for manifest.archives

Thomas Waldmann 8 years ago
parent
commit
1f056d9e8a
5 changed files with 99 additions and 45 deletions
  1. 20 18
      src/borg/archive.py
  2. 4 4
      src/borg/archiver.py
  3. 4 4
      src/borg/cache.py
  4. 3 3
      src/borg/fuse.py
  5. 68 16
      src/borg/helpers.py

+ 20 - 18
src/borg/archive.py

@@ -269,10 +269,10 @@ class Archive:
                     break
                     break
                 i += 1
                 i += 1
         else:
         else:
-            if name not in self.manifest.archives:
+            info = self.manifest.archives.get(name)
+            if info is None:
                 raise self.DoesNotExist(name)
                 raise self.DoesNotExist(name)
-            info = self.manifest.archives[name]
-            self.load(info[b'id'])
+            self.load(info.id)
             self.zeros = b'\0' * (1 << chunker_params[1])
             self.zeros = b'\0' * (1 << chunker_params[1])
 
 
     def _load_meta(self, id):
     def _load_meta(self, id):
@@ -379,7 +379,7 @@ Number of files: {0.stats.nfiles}'''.format(
         data = msgpack.packb(metadata.as_dict(), unicode_errors='surrogateescape')
         data = msgpack.packb(metadata.as_dict(), unicode_errors='surrogateescape')
         self.id = self.key.id_hash(data)
         self.id = self.key.id_hash(data)
         self.cache.add_chunk(self.id, Chunk(data), self.stats)
         self.cache.add_chunk(self.id, Chunk(data), self.stats)
-        self.manifest.archives[name] = {'id': self.id, 'time': metadata.time}
+        self.manifest.archives[name] = (self.id, metadata.time)
         self.manifest.write()
         self.manifest.write()
         self.repository.commit()
         self.repository.commit()
         self.cache.commit()
         self.cache.commit()
@@ -593,7 +593,7 @@ Number of files: {0.stats.nfiles}'''.format(
         data = msgpack.packb(metadata.as_dict(), unicode_errors='surrogateescape')
         data = msgpack.packb(metadata.as_dict(), unicode_errors='surrogateescape')
         new_id = self.key.id_hash(data)
         new_id = self.key.id_hash(data)
         self.cache.add_chunk(new_id, Chunk(data), self.stats)
         self.cache.add_chunk(new_id, Chunk(data), self.stats)
-        self.manifest.archives[self.name] = {'id': new_id, 'time': metadata.time}
+        self.manifest.archives[self.name] = (new_id, metadata.time)
         self.cache.chunk_decref(self.id, self.stats)
         self.cache.chunk_decref(self.id, self.stats)
         self.id = new_id
         self.id = new_id
 
 
@@ -844,7 +844,7 @@ Number of files: {0.stats.nfiles}'''.format(
     @staticmethod
     @staticmethod
     def list_archives(repository, key, manifest, cache=None):
     def list_archives(repository, key, manifest, cache=None):
         # expensive! see also Manifest.list_archive_infos.
         # expensive! see also Manifest.list_archive_infos.
-        for name, info in manifest.archives.items():
+        for name in manifest.archives:
             yield Archive(repository, key, manifest, name, cache=cache)
             yield Archive(repository, key, manifest, name, cache=cache)
 
 
     @staticmethod
     @staticmethod
@@ -1077,7 +1077,7 @@ class ArchiveChecker:
             if valid_archive(archive):
             if valid_archive(archive):
                 archive = ArchiveItem(internal_dict=archive)
                 archive = ArchiveItem(internal_dict=archive)
                 logger.info('Found archive %s', archive.name)
                 logger.info('Found archive %s', archive.name)
-                manifest.archives[archive.name] = {b'id': chunk_id, b'time': archive.time}
+                manifest.archives[archive.name] = (chunk_id, archive.time)
         logger.info('Manifest rebuild complete.')
         logger.info('Manifest rebuild complete.')
         return manifest
         return manifest
 
 
@@ -1216,28 +1216,30 @@ class ArchiveChecker:
 
 
         if archive is None:
         if archive is None:
             # we need last N or all archives
             # we need last N or all archives
-            archive_items = sorted(self.manifest.archives.items(), reverse=True,
-                                   key=lambda name_info: name_info[1][b'time'])
+            archive_infos = self.manifest.archives.list(sort_by='ts', reverse=True)
             if prefix is not None:
             if prefix is not None:
-                archive_items = [item for item in archive_items if item[0].startswith(prefix)]
-            num_archives = len(archive_items)
+                archive_infos = [info for info in archive_infos if info.name.startswith(prefix)]
+            num_archives = len(archive_infos)
             end = None if last is None else min(num_archives, last)
             end = None if last is None else min(num_archives, last)
         else:
         else:
             # we only want one specific archive
             # we only want one specific archive
-            archive_items = [item for item in self.manifest.archives.items() if item[0] == archive]
-            if not archive_items:
+            info = self.manifest.archives.get(archive)
+            if info is None:
                 logger.error("Archive '%s' not found.", archive)
                 logger.error("Archive '%s' not found.", archive)
+                archive_infos = []
+            else:
+                archive_infos = [info]
             num_archives = 1
             num_archives = 1
             end = 1
             end = 1
 
 
         with cache_if_remote(self.repository) as repository:
         with cache_if_remote(self.repository) as repository:
-            for i, (name, info) in enumerate(archive_items[:end]):
-                logger.info('Analyzing archive {} ({}/{})'.format(name, num_archives - i, num_archives))
-                archive_id = info[b'id']
+            for i, info in enumerate(archive_infos[:end]):
+                logger.info('Analyzing archive {} ({}/{})'.format(info.name, num_archives - i, num_archives))
+                archive_id = info.id
                 if archive_id not in self.chunks:
                 if archive_id not in self.chunks:
                     logger.error('Archive metadata block is missing!')
                     logger.error('Archive metadata block is missing!')
                     self.error_found = True
                     self.error_found = True
-                    del self.manifest.archives[name]
+                    del self.manifest.archives[info.name]
                     continue
                     continue
                 mark_as_possibly_superseded(archive_id)
                 mark_as_possibly_superseded(archive_id)
                 cdata = self.repository.get(archive_id)
                 cdata = self.repository.get(archive_id)
@@ -1260,7 +1262,7 @@ class ArchiveChecker:
                 new_archive_id = self.key.id_hash(data)
                 new_archive_id = self.key.id_hash(data)
                 cdata = self.key.encrypt(Chunk(data))
                 cdata = self.key.encrypt(Chunk(data))
                 add_reference(new_archive_id, len(data), len(cdata), cdata)
                 add_reference(new_archive_id, len(data), len(cdata), cdata)
-                info[b'id'] = new_archive_id
+                self.manifest.archives[info.name] = (new_archive_id, info.ts)
 
 
     def orphan_chunks_check(self):
     def orphan_chunks_check(self):
         if self.check_all:
         if self.check_all:

+ 4 - 4
src/borg/archiver.py

@@ -734,7 +734,7 @@ class Archiver:
                     msg.append("This repository seems to have no manifest, so we can't tell anything about its contents.")
                     msg.append("This repository seems to have no manifest, so we can't tell anything about its contents.")
                 else:
                 else:
                     msg.append("You requested to completely DELETE the repository *including* all archives it contains:")
                     msg.append("You requested to completely DELETE the repository *including* all archives it contains:")
-                    for archive_info in manifest.list_archive_infos(sort_by='ts'):
+                    for archive_info in manifest.archives.list(sort_by='ts'):
                         msg.append(format_archive(archive_info))
                         msg.append(format_archive(archive_info))
                 msg.append("Type 'YES' if you understand this and want to continue: ")
                 msg.append("Type 'YES' if you understand this and want to continue: ")
                 msg = '\n'.join(msg)
                 msg = '\n'.join(msg)
@@ -812,7 +812,7 @@ class Archiver:
                 format = "{archive:<36} {time} [{id}]{NL}"
                 format = "{archive:<36} {time} [{id}]{NL}"
             formatter = ArchiveFormatter(format)
             formatter = ArchiveFormatter(format)
 
 
-            for archive_info in manifest.list_archive_infos(sort_by='ts'):
+            for archive_info in manifest.archives.list(sort_by='ts'):
                 if args.prefix and not archive_info.name.startswith(args.prefix):
                 if args.prefix and not archive_info.name.startswith(args.prefix):
                     continue
                     continue
                 write(safe_encode(formatter.format_item(archive_info)))
                 write(safe_encode(formatter.format_item(archive_info)))
@@ -857,7 +857,7 @@ class Archiver:
                              '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", '
                              '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", '
                              '"keep-weekly", "keep-monthly" or "keep-yearly" settings must be specified.')
                              '"keep-weekly", "keep-monthly" or "keep-yearly" settings must be specified.')
             return self.exit_code
             return self.exit_code
-        archives_checkpoints = manifest.list_archive_infos(sort_by='ts', reverse=True)  # just a ArchiveInfo list
+        archives_checkpoints = manifest.archives.list(sort_by='ts', reverse=True)  # just a ArchiveInfo list
         if args.prefix:
         if args.prefix:
             archives_checkpoints = [arch for arch in archives_checkpoints if arch.name.startswith(args.prefix)]
             archives_checkpoints = [arch for arch in archives_checkpoints if arch.name.startswith(args.prefix)]
         is_checkpoint = re.compile(r'\.checkpoint(\.\d+)?$').search
         is_checkpoint = re.compile(r'\.checkpoint(\.\d+)?$').search
@@ -974,7 +974,7 @@ class Archiver:
                 if args.target is not None:
                 if args.target is not None:
                     self.print_error('--target: Need to specify single archive')
                     self.print_error('--target: Need to specify single archive')
                     return self.exit_code
                     return self.exit_code
-                for archive in manifest.list_archive_infos(sort_by='ts'):
+                for archive in manifest.archives.list(sort_by='ts'):
                     name = archive.name
                     name = archive.name
                     if recreater.is_temporary_archive(name):
                     if recreater.is_temporary_archive(name):
                         continue
                         continue

+ 4 - 4
src/borg/cache.py

@@ -279,7 +279,7 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
                 return set()
                 return set()
 
 
         def repo_archives():
         def repo_archives():
-            return set(info[b'id'] for info in self.manifest.archives.values())
+            return set(info.id for info in self.manifest.archives.list())
 
 
         def cleanup_outdated(ids):
         def cleanup_outdated(ids):
             for id in ids:
             for id in ids:
@@ -318,9 +318,9 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
             return chunk_idx
             return chunk_idx
 
 
         def lookup_name(archive_id):
         def lookup_name(archive_id):
-            for name, info in self.manifest.archives.items():
-                if info[b'id'] == archive_id:
-                    return name
+            for info in self.manifest.archives.list():
+                if info.id == archive_id:
+                    return info.name
 
 
         def create_master_idx(chunk_idx):
         def create_master_idx(chunk_idx):
             logger.info('Synchronizing chunks cache...')
             logger.info('Synchronizing chunks cache...')

+ 3 - 3
src/borg/fuse.py

@@ -73,11 +73,11 @@ class FuseOperations(llfuse.Operations):
         if archive:
         if archive:
             self.process_archive(archive)
             self.process_archive(archive)
         else:
         else:
-            for archive_name in manifest.archives:
+            for name in manifest.archives:
                 # Create archive placeholder inode
                 # Create archive placeholder inode
                 archive_inode = self._create_dir(parent=1)
                 archive_inode = self._create_dir(parent=1)
-                self.contents[1][os.fsencode(archive_name)] = archive_inode
-                self.pending_archives[archive_inode] = Archive(repository, key, manifest, archive_name)
+                self.contents[1][os.fsencode(name)] = archive_inode
+                self.pending_archives[archive_inode] = Archive(repository, key, manifest, name)
 
 
     def mount(self, mountpoint, mount_options, foreground=False):
     def mount(self, mountpoint, mount_options, foreground=False):
         """Mount filesystem on *mountpoint* with *mount_options*."""
         """Mount filesystem on *mountpoint* with *mount_options*."""

+ 68 - 16
src/borg/helpers.py

@@ -18,7 +18,7 @@ import time
 import unicodedata
 import unicodedata
 import uuid
 import uuid
 from binascii import hexlify
 from binascii import hexlify
-from collections import namedtuple, deque
+from collections import namedtuple, deque, abc
 from contextlib import contextmanager
 from contextlib import contextmanager
 from datetime import datetime, timezone, timedelta
 from datetime import datetime, timezone, timedelta
 from fnmatch import translate
 from fnmatch import translate
@@ -97,12 +97,76 @@ def check_extension_modules():
         raise ExtensionModuleError
         raise ExtensionModuleError
 
 
 
 
+ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts')
+
+
+class Archives(abc.MutableMapping):
+    """
+    Nice wrapper around the archives dict, making sure only valid types/values get in
+    and we can deal with str keys (and it internally encodes to byte keys) and eiter
+    str timestamps or datetime timestamps.
+    """
+    def __init__(self):
+        # key: encoded archive name, value: dict(b'id': bytes_id, b'time': bytes_iso_ts)
+        self._archives = {}
+
+    def __len__(self):
+        return len(self._archives)
+
+    def __iter__(self):
+        return iter(safe_decode(name) for name in self._archives)
+
+    def __getitem__(self, name):
+        assert isinstance(name, str)
+        _name = safe_encode(name)
+        values = self._archives.get(_name)
+        if values is None:
+            raise KeyError
+        ts = parse_timestamp(values[b'time'].decode('utf-8'))
+        return ArchiveInfo(name=name, id=values[b'id'], ts=ts)
+
+    def __setitem__(self, name, info):
+        assert isinstance(name, str)
+        name = safe_encode(name)
+        assert isinstance(info, tuple)
+        id, ts = info
+        assert isinstance(id, bytes)
+        if isinstance(ts, datetime):
+            ts = ts.replace(tzinfo=None).isoformat()
+        assert isinstance(ts, str)
+        ts = ts.encode()
+        self._archives[name] = {b'id': id, b'time': ts}
+
+    def __delitem__(self, name):
+        assert isinstance(name, str)
+        name = safe_encode(name)
+        del self._archives[name]
+
+    def list(self, sort_by=None, reverse=False):
+        # inexpensive Archive.list_archives replacement if we just need .name, .id, .ts
+        archives = self.values()  # [self[name] for name in self]
+        if sort_by is not None:
+            archives = sorted(archives, key=attrgetter(sort_by), reverse=reverse)
+        return archives
+
+    def set_raw_dict(self, d):
+        """set the dict we get from the msgpack unpacker"""
+        for k, v in d.items():
+            assert isinstance(k, bytes)
+            assert isinstance(v, dict) and b'id' in v and b'time' in v
+            self._archives[k] = v
+
+    def get_raw_dict(self):
+        """get the dict we can give to the msgpack packer"""
+        return self._archives
+
+
 class Manifest:
 class Manifest:
 
 
     MANIFEST_ID = b'\0' * 32
     MANIFEST_ID = b'\0' * 32
 
 
     def __init__(self, key, repository, item_keys=None):
     def __init__(self, key, repository, item_keys=None):
-        self.archives = {}
+        self.archives = Archives()
         self.config = {}
         self.config = {}
         self.key = key
         self.key = key
         self.repository = repository
         self.repository = repository
@@ -129,7 +193,7 @@ class Manifest:
         m = ManifestItem(internal_dict=msgpack.unpackb(data))
         m = ManifestItem(internal_dict=msgpack.unpackb(data))
         if m.get('version') != 1:
         if m.get('version') != 1:
             raise ValueError('Invalid manifest version')
             raise ValueError('Invalid manifest version')
-        manifest.archives = {safe_decode(k): v for k, v in m.archives.items()}
+        manifest.archives.set_raw_dict(m.archives)
         manifest.timestamp = m.get('timestamp')
         manifest.timestamp = m.get('timestamp')
         manifest.config = m.config
         manifest.config = m.config
         # valid item keys are whatever is known in the repo or every key we know
         # valid item keys are whatever is known in the repo or every key we know
@@ -141,7 +205,7 @@ class Manifest:
         self.timestamp = datetime.utcnow().isoformat()
         self.timestamp = datetime.utcnow().isoformat()
         manifest = ManifestItem(
         manifest = ManifestItem(
             version=1,
             version=1,
-            archives=self.archives,
+            archives=self.archives.get_raw_dict(),
             timestamp=self.timestamp,
             timestamp=self.timestamp,
             config=self.config,
             config=self.config,
             item_keys=tuple(self.item_keys),
             item_keys=tuple(self.item_keys),
@@ -150,18 +214,6 @@ class Manifest:
         self.id = self.key.id_hash(data)
         self.id = self.key.id_hash(data)
         self.repository.put(self.MANIFEST_ID, self.key.encrypt(Chunk(data)))
         self.repository.put(self.MANIFEST_ID, self.key.encrypt(Chunk(data)))
 
 
-    def list_archive_infos(self, sort_by=None, reverse=False):
-        # inexpensive Archive.list_archives replacement if we just need .name, .id, .ts
-        ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts')
-        archives = []
-        for name, values in self.archives.items():
-            ts = parse_timestamp(values[b'time'].decode('utf-8'))
-            id = values[b'id']
-            archives.append(ArchiveInfo(name=name, id=id, ts=ts))
-        if sort_by is not None:
-            archives = sorted(archives, key=attrgetter(sort_by), reverse=reverse)
-        return archives
-
 
 
 def prune_within(archives, within):
 def prune_within(archives, within):
     multiplier = {'H': 1, 'd': 24, 'w': 24 * 7, 'm': 24 * 31, 'y': 24 * 365}
     multiplier = {'H': 1, 'd': 24, 'w': 24 * 7, 'm': 24 * 31, 'y': 24 * 365}