Kaynağa Gözat

more safe interface for manifest.archives

Thomas Waldmann 8 yıl önce
ebeveyn
işleme
1f056d9e8a
5 değiştirilmiş dosya ile 99 ekleme ve 45 silme
  1. 20 18
      src/borg/archive.py
  2. 4 4
      src/borg/archiver.py
  3. 4 4
      src/borg/cache.py
  4. 3 3
      src/borg/fuse.py
  5. 68 16
      src/borg/helpers.py

+ 20 - 18
src/borg/archive.py

@@ -269,10 +269,10 @@ class Archive:
                     break
                 i += 1
         else:
-            if name not in self.manifest.archives:
+            info = self.manifest.archives.get(name)
+            if info is None:
                 raise self.DoesNotExist(name)
-            info = self.manifest.archives[name]
-            self.load(info[b'id'])
+            self.load(info.id)
             self.zeros = b'\0' * (1 << chunker_params[1])
 
     def _load_meta(self, id):
@@ -379,7 +379,7 @@ Number of files: {0.stats.nfiles}'''.format(
         data = msgpack.packb(metadata.as_dict(), unicode_errors='surrogateescape')
         self.id = self.key.id_hash(data)
         self.cache.add_chunk(self.id, Chunk(data), self.stats)
-        self.manifest.archives[name] = {'id': self.id, 'time': metadata.time}
+        self.manifest.archives[name] = (self.id, metadata.time)
         self.manifest.write()
         self.repository.commit()
         self.cache.commit()
@@ -593,7 +593,7 @@ Number of files: {0.stats.nfiles}'''.format(
         data = msgpack.packb(metadata.as_dict(), unicode_errors='surrogateescape')
         new_id = self.key.id_hash(data)
         self.cache.add_chunk(new_id, Chunk(data), self.stats)
-        self.manifest.archives[self.name] = {'id': new_id, 'time': metadata.time}
+        self.manifest.archives[self.name] = (new_id, metadata.time)
         self.cache.chunk_decref(self.id, self.stats)
         self.id = new_id
 
@@ -844,7 +844,7 @@ Number of files: {0.stats.nfiles}'''.format(
     @staticmethod
     def list_archives(repository, key, manifest, cache=None):
         # expensive! see also Manifest.list_archive_infos.
-        for name, info in manifest.archives.items():
+        for name in manifest.archives:
             yield Archive(repository, key, manifest, name, cache=cache)
 
     @staticmethod
@@ -1077,7 +1077,7 @@ class ArchiveChecker:
             if valid_archive(archive):
                 archive = ArchiveItem(internal_dict=archive)
                 logger.info('Found archive %s', archive.name)
-                manifest.archives[archive.name] = {b'id': chunk_id, b'time': archive.time}
+                manifest.archives[archive.name] = (chunk_id, archive.time)
         logger.info('Manifest rebuild complete.')
         return manifest
 
@@ -1216,28 +1216,30 @@ class ArchiveChecker:
 
         if archive is None:
             # we need last N or all archives
-            archive_items = sorted(self.manifest.archives.items(), reverse=True,
-                                   key=lambda name_info: name_info[1][b'time'])
+            archive_infos = self.manifest.archives.list(sort_by='ts', reverse=True)
             if prefix is not None:
-                archive_items = [item for item in archive_items if item[0].startswith(prefix)]
-            num_archives = len(archive_items)
+                archive_infos = [info for info in archive_infos if info.name.startswith(prefix)]
+            num_archives = len(archive_infos)
             end = None if last is None else min(num_archives, last)
         else:
             # we only want one specific archive
-            archive_items = [item for item in self.manifest.archives.items() if item[0] == archive]
-            if not archive_items:
+            info = self.manifest.archives.get(archive)
+            if info is None:
                 logger.error("Archive '%s' not found.", archive)
+                archive_infos = []
+            else:
+                archive_infos = [info]
             num_archives = 1
             end = 1
 
         with cache_if_remote(self.repository) as repository:
-            for i, (name, info) in enumerate(archive_items[:end]):
-                logger.info('Analyzing archive {} ({}/{})'.format(name, num_archives - i, num_archives))
-                archive_id = info[b'id']
+            for i, info in enumerate(archive_infos[:end]):
+                logger.info('Analyzing archive {} ({}/{})'.format(info.name, num_archives - i, num_archives))
+                archive_id = info.id
                 if archive_id not in self.chunks:
                     logger.error('Archive metadata block is missing!')
                     self.error_found = True
-                    del self.manifest.archives[name]
+                    del self.manifest.archives[info.name]
                     continue
                 mark_as_possibly_superseded(archive_id)
                 cdata = self.repository.get(archive_id)
@@ -1260,7 +1262,7 @@ class ArchiveChecker:
                 new_archive_id = self.key.id_hash(data)
                 cdata = self.key.encrypt(Chunk(data))
                 add_reference(new_archive_id, len(data), len(cdata), cdata)
-                info[b'id'] = new_archive_id
+                self.manifest.archives[info.name] = (new_archive_id, info.ts)
 
     def orphan_chunks_check(self):
         if self.check_all:

+ 4 - 4
src/borg/archiver.py

@@ -734,7 +734,7 @@ class Archiver:
                     msg.append("This repository seems to have no manifest, so we can't tell anything about its contents.")
                 else:
                     msg.append("You requested to completely DELETE the repository *including* all archives it contains:")
-                    for archive_info in manifest.list_archive_infos(sort_by='ts'):
+                    for archive_info in manifest.archives.list(sort_by='ts'):
                         msg.append(format_archive(archive_info))
                 msg.append("Type 'YES' if you understand this and want to continue: ")
                 msg = '\n'.join(msg)
@@ -812,7 +812,7 @@ class Archiver:
                 format = "{archive:<36} {time} [{id}]{NL}"
             formatter = ArchiveFormatter(format)
 
-            for archive_info in manifest.list_archive_infos(sort_by='ts'):
+            for archive_info in manifest.archives.list(sort_by='ts'):
                 if args.prefix and not archive_info.name.startswith(args.prefix):
                     continue
                 write(safe_encode(formatter.format_item(archive_info)))
@@ -857,7 +857,7 @@ class Archiver:
                              '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", '
                              '"keep-weekly", "keep-monthly" or "keep-yearly" settings must be specified.')
             return self.exit_code
-        archives_checkpoints = manifest.list_archive_infos(sort_by='ts', reverse=True)  # just a ArchiveInfo list
+        archives_checkpoints = manifest.archives.list(sort_by='ts', reverse=True)  # just a ArchiveInfo list
         if args.prefix:
             archives_checkpoints = [arch for arch in archives_checkpoints if arch.name.startswith(args.prefix)]
         is_checkpoint = re.compile(r'\.checkpoint(\.\d+)?$').search
@@ -974,7 +974,7 @@ class Archiver:
                 if args.target is not None:
                     self.print_error('--target: Need to specify single archive')
                     return self.exit_code
-                for archive in manifest.list_archive_infos(sort_by='ts'):
+                for archive in manifest.archives.list(sort_by='ts'):
                     name = archive.name
                     if recreater.is_temporary_archive(name):
                         continue

+ 4 - 4
src/borg/cache.py

@@ -279,7 +279,7 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
                 return set()
 
         def repo_archives():
-            return set(info[b'id'] for info in self.manifest.archives.values())
+            return set(info.id for info in self.manifest.archives.list())
 
         def cleanup_outdated(ids):
             for id in ids:
@@ -318,9 +318,9 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
             return chunk_idx
 
         def lookup_name(archive_id):
-            for name, info in self.manifest.archives.items():
-                if info[b'id'] == archive_id:
-                    return name
+            for info in self.manifest.archives.list():
+                if info.id == archive_id:
+                    return info.name
 
         def create_master_idx(chunk_idx):
             logger.info('Synchronizing chunks cache...')

+ 3 - 3
src/borg/fuse.py

@@ -73,11 +73,11 @@ class FuseOperations(llfuse.Operations):
         if archive:
             self.process_archive(archive)
         else:
-            for archive_name in manifest.archives:
+            for name in manifest.archives:
                 # Create archive placeholder inode
                 archive_inode = self._create_dir(parent=1)
-                self.contents[1][os.fsencode(archive_name)] = archive_inode
-                self.pending_archives[archive_inode] = Archive(repository, key, manifest, archive_name)
+                self.contents[1][os.fsencode(name)] = archive_inode
+                self.pending_archives[archive_inode] = Archive(repository, key, manifest, name)
 
     def mount(self, mountpoint, mount_options, foreground=False):
         """Mount filesystem on *mountpoint* with *mount_options*."""

+ 68 - 16
src/borg/helpers.py

@@ -18,7 +18,7 @@ import time
 import unicodedata
 import uuid
 from binascii import hexlify
-from collections import namedtuple, deque
+from collections import namedtuple, deque, abc
 from contextlib import contextmanager
 from datetime import datetime, timezone, timedelta
 from fnmatch import translate
@@ -97,12 +97,76 @@ def check_extension_modules():
         raise ExtensionModuleError
 
 
+ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts')
+
+
+class Archives(abc.MutableMapping):
+    """
+    Nice wrapper around the archives dict, making sure only valid types/values get in
+    and we can deal with str keys (and it internally encodes to byte keys) and eiter
+    str timestamps or datetime timestamps.
+    """
+    def __init__(self):
+        # key: encoded archive name, value: dict(b'id': bytes_id, b'time': bytes_iso_ts)
+        self._archives = {}
+
+    def __len__(self):
+        return len(self._archives)
+
+    def __iter__(self):
+        return iter(safe_decode(name) for name in self._archives)
+
+    def __getitem__(self, name):
+        assert isinstance(name, str)
+        _name = safe_encode(name)
+        values = self._archives.get(_name)
+        if values is None:
+            raise KeyError
+        ts = parse_timestamp(values[b'time'].decode('utf-8'))
+        return ArchiveInfo(name=name, id=values[b'id'], ts=ts)
+
+    def __setitem__(self, name, info):
+        assert isinstance(name, str)
+        name = safe_encode(name)
+        assert isinstance(info, tuple)
+        id, ts = info
+        assert isinstance(id, bytes)
+        if isinstance(ts, datetime):
+            ts = ts.replace(tzinfo=None).isoformat()
+        assert isinstance(ts, str)
+        ts = ts.encode()
+        self._archives[name] = {b'id': id, b'time': ts}
+
+    def __delitem__(self, name):
+        assert isinstance(name, str)
+        name = safe_encode(name)
+        del self._archives[name]
+
+    def list(self, sort_by=None, reverse=False):
+        # inexpensive Archive.list_archives replacement if we just need .name, .id, .ts
+        archives = self.values()  # [self[name] for name in self]
+        if sort_by is not None:
+            archives = sorted(archives, key=attrgetter(sort_by), reverse=reverse)
+        return archives
+
+    def set_raw_dict(self, d):
+        """set the dict we get from the msgpack unpacker"""
+        for k, v in d.items():
+            assert isinstance(k, bytes)
+            assert isinstance(v, dict) and b'id' in v and b'time' in v
+            self._archives[k] = v
+
+    def get_raw_dict(self):
+        """get the dict we can give to the msgpack packer"""
+        return self._archives
+
+
 class Manifest:
 
     MANIFEST_ID = b'\0' * 32
 
     def __init__(self, key, repository, item_keys=None):
-        self.archives = {}
+        self.archives = Archives()
         self.config = {}
         self.key = key
         self.repository = repository
@@ -129,7 +193,7 @@ class Manifest:
         m = ManifestItem(internal_dict=msgpack.unpackb(data))
         if m.get('version') != 1:
             raise ValueError('Invalid manifest version')
-        manifest.archives = {safe_decode(k): v for k, v in m.archives.items()}
+        manifest.archives.set_raw_dict(m.archives)
         manifest.timestamp = m.get('timestamp')
         manifest.config = m.config
         # valid item keys are whatever is known in the repo or every key we know
@@ -141,7 +205,7 @@ class Manifest:
         self.timestamp = datetime.utcnow().isoformat()
         manifest = ManifestItem(
             version=1,
-            archives=self.archives,
+            archives=self.archives.get_raw_dict(),
             timestamp=self.timestamp,
             config=self.config,
             item_keys=tuple(self.item_keys),
@@ -150,18 +214,6 @@ class Manifest:
         self.id = self.key.id_hash(data)
         self.repository.put(self.MANIFEST_ID, self.key.encrypt(Chunk(data)))
 
-    def list_archive_infos(self, sort_by=None, reverse=False):
-        # inexpensive Archive.list_archives replacement if we just need .name, .id, .ts
-        ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts')
-        archives = []
-        for name, values in self.archives.items():
-            ts = parse_timestamp(values[b'time'].decode('utf-8'))
-            id = values[b'id']
-            archives.append(ArchiveInfo(name=name, id=id, ts=ts))
-        if sort_by is not None:
-            archives = sorted(archives, key=attrgetter(sort_by), reverse=reverse)
-        return archives
-
 
 def prune_within(archives, within):
     multiplier = {'H': 1, 'd': 24, 'w': 24 * 7, 'm': 24 * 31, 'y': 24 * 365}