瀏覽代碼

Chunker: yield Chunk namedtuple instead of bytes/memoryview

Thomas Waldmann 4 年之前
父節點
當前提交
8c299696aa
共有 2 個文件被更改,包括 31 次插入4 次删除
  1. 28 4
      src/borg/chunker.pyx
  2. 3 0
      src/borg/constants.py

+ 28 - 4
src/borg/chunker.pyx

@@ -4,6 +4,9 @@ API_VERSION = '1.2_01'
 
 import errno
 import os
+from collections import namedtuple
+
+from .constants import CH_DATA, CH_HOLE
 
 from libc.stdlib cimport free
 
@@ -26,6 +29,25 @@ cdef extern from "_chunker.c":
 has_seek_hole = hasattr(os, 'SEEK_DATA') and hasattr(os, 'SEEK_HOLE')
 
 
+_Chunk = namedtuple('_Chunk', 'meta data')
+_Chunk.__doc__ = """\
+    Chunk namedtuple
+
+    meta is always a dictionary, data depends on allocation.
+
+    on disk data:
+        meta = {'allocation' = CH_DATA, 'size' = size_of_data }
+        data = read_data [bytes or memoryview]
+
+    hole in a sparse file:
+        meta = {'allocation' = CH_HOLE, 'size' = size_of_hole }
+        data = None
+"""
+
+def Chunk(data, **meta):
+    return _Chunk(meta, data)
+
+
 def dread(offset, size, fd=None, fh=-1):
     use_fh = fh >= 0
     if use_fh:
@@ -178,15 +200,16 @@ class ChunkerFixed:
                 if is_data:
                     # read block from the range
                     data = dread(offset, wanted, fd, fh)
+                    got = len(data)
                 else:  # hole
                     # seek over block from the range
                     pos = dseek(wanted, os.SEEK_CUR, fd, fh)
-                    data = self.zeros[:pos - offset]  # for now, create zero-bytes here
-                got = len(data)
+                    data = None
+                    got = pos - offset
                 if got > 0:
                     offset += got
                     range_size -= got
-                    yield data  # later, use a better api that tags data vs. hole
+                    yield Chunk(data, size=got, allocation=CH_DATA if is_data else CH_HOLE)
                 if got < wanted:
                     # we did not get enough data, looks like EOF.
                     return
@@ -233,7 +256,8 @@ cdef class Chunker:
         return self
 
     def __next__(self):
-        return chunker_process(self.chunker)
+        data = chunker_process(self.chunker)
+        return Chunk(data, size=len(data), allocation=CH_DATA)  # no sparse support here
 
 
 def get_chunker(algo, *params, **kw):

+ 3 - 0
src/borg/constants.py

@@ -75,6 +75,9 @@ CHUNKER_PARAMS = (CH_BUZHASH, CHUNK_MIN_EXP, CHUNK_MAX_EXP, HASH_MASK_BITS, HASH
 # chunker params for the items metadata stream, finer granularity
 ITEMS_CHUNKER_PARAMS = (CH_BUZHASH, 15, 19, 17, HASH_WINDOW_SIZE)
 
+# normal on-disk data, allocated (but not written, all zeros), not allocated hole (all zeros)
+CH_DATA, CH_ALLOC, CH_HOLE = 0, 1, 2
+
 # operating mode of the files cache (for fast skipping of unchanged files)
 DEFAULT_FILES_CACHE_MODE_UI = 'ctime,size,inode'
 DEFAULT_FILES_CACHE_MODE = 'cis'  # == CacheMode(DEFAULT_FILES_CACHE_MODE_UI)