瀏覽代碼

Merge pull request #918 from ThomasWaldmann/namedtuple2

better readability and less errors with namedtuples, fixes #823
TW 9 年之前
父節點
當前提交
30f732052e
共有 6 個文件被更改,包括 55 次插入48 次删除
  1. 15 13
      borg/archive.py
  2. 7 6
      borg/archiver.py
  3. 21 21
      borg/cache.py
  4. 6 2
      borg/hashindex.pyx
  5. 5 5
      borg/helpers.py
  6. 1 1
      borg/testsuite/archiver.py

+ 15 - 13
borg/archive.py

@@ -25,7 +25,8 @@ from .helpers import Error, uid2user, user2uid, gid2group, group2gid, \
 from .repository import Repository
 from .platform import acl_get, acl_set
 from .chunker import Chunker
-from .hashindex import ChunkIndex
+from .hashindex import ChunkIndex, ChunkIndexEntry
+from .cache import ChunkListEntry
 import msgpack
 
 ITEMS_BUFFER = 1024 * 1024
@@ -61,10 +62,13 @@ class DownloadPipeline:
             items = [decode_dict(item, (b'path', b'source', b'user', b'group')) for item in unpacker]
             if filter:
                 items = [item for item in items if filter(item)]
+            for item in items:
+                if b'chunks' in item:
+                    item[b'chunks'] = [ChunkListEntry(*e) for e in item[b'chunks']]
             if preload:
                 for item in items:
                     if b'chunks' in item:
-                        self.repository.preload([c[0] for c in item[b'chunks']])
+                        self.repository.preload([c.id for c in item[b'chunks']])
             for item in items:
                 yield item
 
@@ -318,7 +322,7 @@ Number of files: {0.stats.nfiles}'''.format(
         """
         if dry_run or stdout:
             if b'chunks' in item:
-                for data in self.pipeline.fetch_many([c[0] for c in item[b'chunks']], is_preloaded=True):
+                for data in self.pipeline.fetch_many([c.id for c in item[b'chunks']], is_preloaded=True):
                     if stdout:
                         sys.stdout.buffer.write(data)
                 if stdout:
@@ -361,7 +365,7 @@ Number of files: {0.stats.nfiles}'''.format(
                     return
                 # Extract chunks, since the item which had the chunks was not extracted
             with open(path, 'wb') as fd:
-                ids = [c[0] for c in item[b'chunks']]
+                ids = [c.id for c in item[b'chunks']]
                 for data in self.pipeline.fetch_many(ids, is_preloaded=True):
                     if sparse and self.zeros.startswith(data):
                         # all-zero chunk: create a hole in a sparse file
@@ -600,7 +604,7 @@ Number of files: {0.stats.nfiles}'''.format(
                     chunks.append(cache.add_chunk(self.key.id_hash(chunk), chunk, self.stats))
                     if self.show_progress:
                         self.stats.show_progress(item=item, dt=0.2)
-            cache.memorize_file(path_hash, st, [c[0] for c in chunks])
+            cache.memorize_file(path_hash, st, [c.id for c in chunks])
             status = status or 'M'  # regular file, modified (if not 'A' already)
         item[b'chunks'] = chunks
         item.update(self.stat_attrs(st, path))
@@ -732,8 +736,9 @@ class ArchiveChecker:
             if not result:
                 break
             marker = result[-1]
+            init_entry = ChunkIndexEntry(refcount=0, size=0, csize=0)
             for id_ in result:
-                self.chunks[id_] = (0, 0, 0)
+                self.chunks[id_] = init_entry
 
     def identify_key(self, repository):
         cdata = repository.get(next(self.chunks.iteritems())[0])
@@ -775,7 +780,7 @@ class ArchiveChecker:
         del self.chunks[Manifest.MANIFEST_ID]
 
         def mark_as_possibly_superseded(id_):
-            if self.chunks.get(id_, (0,))[0] == 0:
+            if self.chunks.get(id_, ChunkIndexEntry(0, 0, 0)).refcount == 0:
                 self.possibly_superseded.add(id_)
 
         def add_callback(chunk):
@@ -789,7 +794,7 @@ class ArchiveChecker:
                 self.chunks.incref(id_)
             except KeyError:
                 assert cdata is not None
-                self.chunks[id_] = 1, size, csize
+                self.chunks[id_] = ChunkIndexEntry(refcount=1, size=size, csize=csize)
                 if self.repair:
                     self.repository.put(id_, cdata)
 
@@ -909,10 +914,7 @@ class ArchiveChecker:
 
     def orphan_chunks_check(self):
         if self.check_all:
-            unused = set()
-            for id_, (count, size, csize) in self.chunks.iteritems():
-                if count == 0:
-                    unused.add(id_)
+            unused = {id_ for id_, entry in self.chunks.iteritems() if entry.refcount == 0}
             orphaned = unused - self.possibly_superseded
             if orphaned:
                 logger.error('{} orphaned objects found!'.format(len(orphaned)))
@@ -1211,7 +1213,7 @@ class ArchiveRecreater:
         for item in old_target.iter_items():
             if b'chunks' in item:
                 for chunk in item[b'chunks']:
-                    self.cache.chunk_incref(chunk[0], target.stats)
+                    self.cache.chunk_incref(chunk.id, target.stats)
                 target.stats.nfiles += 1
             target.add_item(item)
         if item:

+ 7 - 6
borg/archiver.py

@@ -32,6 +32,7 @@ from .cache import Cache
 from .key import key_creator, RepoKey, PassphraseKey
 from .archive import Archive, ArchiveChecker, ArchiveRecreater, CHUNKER_PARAMS
 from .remote import RepositoryServer, RemoteRepository, cache_if_remote
+from .hashindex import ChunkIndexEntry
 
 has_lchflags = hasattr(os, 'lchflags')
 
@@ -446,8 +447,8 @@ class Archiver:
             if item.get(b'deleted'):
                 return None
             else:
-                return sum(c[1] for c in item[b'chunks']
-                           if consider_ids is None or c[0] in consider_ids)
+                return sum(c.size for c in item[b'chunks']
+                           if consider_ids is None or c.id in consider_ids)
 
         def get_owner(item):
             if args.numeric_owner:
@@ -482,8 +483,8 @@ class Archiver:
                 if sum_chunk_size(item1) != sum_chunk_size(item2):
                     return True
                 else:
-                    chunk_ids1 = [c[0] for c in item1[b'chunks']]
-                    chunk_ids2 = [c[0] for c in item2[b'chunks']]
+                    chunk_ids1 = [c.id for c in item1[b'chunks']]
+                    chunk_ids2 = [c.id for c in item2[b'chunks']]
                     return not fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2)
 
         def compare_content(path, item1, item2):
@@ -493,8 +494,8 @@ class Archiver:
                 elif item2.get(b'deleted'):
                     return ('removed {:>11}'.format(format_file_size(sum_chunk_size(item1))))
                 else:
-                    chunk_ids1 = {c[0] for c in item1[b'chunks']}
-                    chunk_ids2 = {c[0] for c in item2[b'chunks']}
+                    chunk_ids1 = {c.id for c in item1[b'chunks']}
+                    chunk_ids2 = {c.id for c in item2[b'chunks']}
                     added_ids = chunk_ids2 - chunk_ids1
                     removed_ids = chunk_ids1 - chunk_ids2
                     added = sum_chunk_size(item2, added_ids)

+ 21 - 21
borg/cache.py

@@ -12,10 +12,13 @@ logger = create_logger()
 from .helpers import Error, get_cache_dir, decode_dict, int_to_bigint, \
     bigint_to_int, format_file_size, yes
 from .locking import UpgradableLock
-from .hashindex import ChunkIndex
+from .hashindex import ChunkIndex, ChunkIndexEntry
 
 import msgpack
 
+ChunkListEntry = namedtuple('ChunkListEntry', 'id size csize')
+FileCacheEntry = namedtuple('FileCacheEntry', 'age inode size mtime chunk_ids')
+
 
 class Cache:
     """Client Side cache
@@ -183,9 +186,9 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
                     break
                 u.feed(data)
                 for path_hash, item in u:
-                    item[0] += 1
+                    entry = FileCacheEntry(*item)
                     # in the end, this takes about 240 Bytes per file
-                    self.files[path_hash] = msgpack.packb(item)
+                    self.files[path_hash] = msgpack.packb(entry._replace(age=entry.age + 1))
 
     def begin_txn(self):
         # Initialize transaction snapshot
@@ -208,9 +211,9 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
                 for path_hash, item in self.files.items():
                     # Discard cached files with the newest mtime to avoid
                     # issues with filesystem snapshots and mtime precision
-                    item = msgpack.unpackb(item)
-                    if item[0] < 10 and bigint_to_int(item[3]) < self._newest_mtime:
-                        msgpack.pack((path_hash, item), fd)
+                    entry = FileCacheEntry(*msgpack.unpackb(item))
+                    if entry.age < 10 and bigint_to_int(entry.mtime) < self._newest_mtime:
+                        msgpack.pack((path_hash, entry), fd)
         self.config.set('cache', 'manifest', hexlify(self.manifest.id).decode('ascii'))
         self.config.set('cache', 'timestamp', self.manifest.timestamp)
         self.config.set('cache', 'key_type', str(self.key.TYPE))
@@ -375,12 +378,12 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
         data = self.key.encrypt(data)
         csize = len(data)
         self.repository.put(id, data, wait=False)
-        self.chunks[id] = (refcount + 1, size, csize)
+        self.chunks[id] = ChunkIndexEntry(refcount + 1, size, csize)
         stats.update(size, csize, True)
-        return id, size, csize
+        return ChunkListEntry(id, size, csize)
 
     def seen_chunk(self, id, size=None):
-        refcount, stored_size, _ = self.chunks.get(id, (0, None, None))
+        refcount, stored_size, _ = self.chunks.get(id, ChunkIndexEntry(0, None, None))
         if size is not None and stored_size is not None and size != stored_size:
             # we already have a chunk with that id, but different size.
             # this is either a hash collision (unlikely) or corruption or a bug.
@@ -393,7 +396,7 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
             self.begin_txn()
         count, size, csize = self.chunks.incref(id)
         stats.update(size, csize, False)
-        return id, size, csize
+        return ChunkListEntry(id, size, csize)
 
     def chunk_decref(self, id, stats):
         if not self.txn_active:
@@ -414,20 +417,17 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
         entry = self.files.get(path_hash)
         if not entry:
             return None
-        entry = msgpack.unpackb(entry)
-        if (entry[2] == st.st_size and bigint_to_int(entry[3]) == st.st_mtime_ns and
-                (ignore_inode or entry[1] == st.st_ino)):
-            # reset entry age
-            entry[0] = 0
-            self.files[path_hash] = msgpack.packb(entry)
-            return entry[4]
+        entry = FileCacheEntry(*msgpack.unpackb(entry))
+        if (entry.size == st.st_size and bigint_to_int(entry.mtime) == st.st_mtime_ns and
+                (ignore_inode or entry.inode == st.st_ino)):
+            self.files[path_hash] = msgpack.packb(entry._replace(age=0))
+            return entry.chunk_ids
         else:
             return None
 
     def memorize_file(self, path_hash, st, ids):
         if not (self.do_files and stat.S_ISREG(st.st_mode)):
             return
-        # Entry: Age, inode, size, mtime, chunk ids
-        mtime_ns = st.st_mtime_ns
-        self.files[path_hash] = msgpack.packb((0, st.st_ino, st.st_size, int_to_bigint(mtime_ns), ids))
-        self._newest_mtime = max(self._newest_mtime, mtime_ns)
+        entry = FileCacheEntry(age=0, inode=st.st_ino, size=st.st_size, mtime=int_to_bigint(st.st_mtime_ns), chunk_ids=ids)
+        self.files[path_hash] = msgpack.packb(entry)
+        self._newest_mtime = max(self._newest_mtime, st.st_mtime_ns)

+ 6 - 2
borg/hashindex.pyx

@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+from collections import namedtuple
 import os
 
 cimport cython
@@ -184,6 +185,9 @@ cdef class NSKeyIterator:
         return (<char *>self.key)[:self.key_size], (segment, _le32toh(value[1]))
 
 
+ChunkIndexEntry = namedtuple('ChunkIndexEntry', 'refcount size csize')
+
+
 cdef class ChunkIndex(IndexBase):
     """
     Mapping of 32 byte keys to (refcount, size, csize), which are all 32-bit unsigned.
@@ -210,7 +214,7 @@ cdef class ChunkIndex(IndexBase):
             raise KeyError(key)
         cdef uint32_t refcount = _le32toh(data[0])
         assert refcount <= _MAX_VALUE
-        return refcount, _le32toh(data[1]), _le32toh(data[2])
+        return ChunkIndexEntry(refcount, _le32toh(data[1]), _le32toh(data[2]))
 
     def __setitem__(self, key, value):
         assert len(key) == self.key_size
@@ -342,4 +346,4 @@ cdef class ChunkKeyIterator:
         cdef uint32_t *value = <uint32_t *>(self.key + self.key_size)
         cdef uint32_t refcount = _le32toh(value[0])
         assert refcount <= MAX_VALUE, "invalid reference count"
-        return (<char *>self.key)[:self.key_size], (refcount, _le32toh(value[1]), _le32toh(value[2]))
+        return (<char *>self.key)[:self.key_size], ChunkIndexEntry(refcount, _le32toh(value[1]), _le32toh(value[2]))

+ 5 - 5
borg/helpers.py

@@ -1257,19 +1257,19 @@ class ItemFormatter:
 
     def calculate_unique_chunks(self, item):
         chunk_index = self.archive.cache.chunks
-        return sum(1 for chunk_id, _, _ in item.get(b'chunks', []) if chunk_index[chunk_id][0] == 1)
+        return sum(1 for c in item.get(b'chunks', []) if chunk_index[c.id].refcount == 1)
 
     def calculate_size(self, item):
-        return sum(size for _, size, _ in item.get(b'chunks', []))
+        return sum(c.size for c in item.get(b'chunks', []))
 
     def calculate_csize(self, item):
-        return sum(csize for _, _, csize in item.get(b'chunks', []))
+        return sum(c.csize for c in item.get(b'chunks', []))
 
     def hash_item(self, hash_function, item):
         if b'chunks' not in item:
             return ""
         hash = hashlib.new(hash_function)
-        for chunk in self.archive.pipeline.fetch_many([c[0] for c in item[b'chunks']]):
+        for chunk in self.archive.pipeline.fetch_many([c.id for c in item[b'chunks']]):
             hash.update(chunk)
         return hash.hexdigest()
 
@@ -1320,7 +1320,7 @@ class ChunkIteratorFileWrapper:
 
 def open_item(archive, item):
     """Return file-like object for archived item (with chunks)."""
-    chunk_iterator = archive.pipeline.fetch_many([c[0] for c in item[b'chunks']])
+    chunk_iterator = archive.pipeline.fetch_many([c.id for c in item[b'chunks']])
     return ChunkIteratorFileWrapper(chunk_iterator)
 
 

+ 1 - 1
borg/testsuite/archiver.py

@@ -1425,7 +1425,7 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
         with repository:
             for item in archive.iter_items():
                 if item[b'path'].endswith('testsuite/archiver.py'):
-                    repository.delete(item[b'chunks'][-1][0])
+                    repository.delete(item[b'chunks'][-1].id)
                     break
             repository.commit()
         self.cmd('check', self.repository_location, exit_code=1)