Jelajahi Sumber

Renamed chunker to chunkifier.

Jonas Borgström 15 tahun lalu
induk
melakukan
6c73f5dc86
2 mengubah file dengan 18 tambahan dan 15 penghapusan
  1. 12 9
      dedupstore/archiver.py
  2. 6 6
      dedupstore/chunkifier.py

+ 12 - 9
dedupstore/archiver.py

@@ -6,7 +6,7 @@ import struct
 import cPickle
 from optparse import OptionParser
 
-from chunker import chunker, checksum
+from chunkifier import chunkify, checksum
 from store import Store
 
 
@@ -70,12 +70,12 @@ class Cache(object):
 
     def add_chunk(self, data):
         sum = checksum(data)
+        data = zlib.compress(data)
         #print 'chunk %d: %d' % (len(data), sum)
-        hash = struct.pack('I', sum) + hashlib.sha1(data).digest()
-        if not self.seen_chunk(hash):
-            zdata = zlib.compress(data)
-            size = len(zdata)
-            self.store.put(NS_CHUNKS, hash, zdata)
+        id = struct.pack('I', sum) + hashlib.sha1(data).digest()
+        if not self.seen_chunk(id):
+            size = len(data)
+            self.store.put(NS_CHUNKS, id, data)
         else:
             size = 0
             #print 'seen chunk', hash.encode('hex')
@@ -164,7 +164,7 @@ class Archiver(object):
                 print item['path'], '...',
                 for chunk in item['chunks']:
                     data = self.store.get(NS_CHUNKS, chunk)
-                    if hashlib.sha1(data).digest() != chunk:
+                    if hashlib.sha1(data).digest() != chunk[4:]:
                         print 'ERROR'
                         break
                 else:
@@ -184,7 +184,10 @@ class Archiver(object):
             if item['type'] == 'FILE':
                 with open(item['path'], 'wb') as fd:
                     for chunk in item['chunks']:
-                        fd.write(zlib.decompress(self.store.get(NS_CHUNKS, chunk)))
+                        data = self.store.get(NS_CHUNKS, chunk)
+                        if hashlib.sha1(data).digest() != chunk[4:]:
+                            raise Exception('Invalid chunk checksum')
+                        fd.write(zlib.decompress(data))
 
     def process_dir(self, path, cache):
         path = path.lstrip('/\\:')
@@ -198,7 +201,7 @@ class Archiver(object):
             origsize = 0
             compsize = 0
             chunks = []
-            for chunk in chunker(fd, CHUNKSIZE, self.cache.summap):
+            for chunk in chunkify(fd, CHUNKSIZE, self.cache.summap):
                 origsize += len(chunk)
                 id, size = cache.add_chunk(chunk)
                 compsize += size

+ 6 - 6
dedupstore/chunker.py → dedupstore/chunkifier.py

@@ -28,25 +28,25 @@ def roll_checksum(sum, remove, add, len):
     return (s1 & 0xffff) + ((s2 & 0xffff) << 16)
 
 
-def chunker(fd, chunk_size, chunks):
+def chunkify(fd, chunk_size, chunks):
     """
     >>> fd = StringIO.StringIO('ABCDEFGHIJKLMN')
-    >>> list(chunker(fd, 4, {}))
+    >>> list(chunkify(fd, 4, {}))
     ['ABCD', 'EFGH', 'IJ', 'KLMN']
     
     >>> fd = StringIO.StringIO('ABCDEFGHIJKLMN')
     >>> chunks = {44564754: True} # 'BCDE'
-    >>> list(chunker(fd, 4, chunks))
+    >>> list(chunkify(fd, 4, chunks))
     ['A', 'BCDE', 'FGHI', 'J', 'KLMN']
 
     >>> fd = StringIO.StringIO('ABCDEFGHIJKLMN')
     >>> chunks = {44564754: True, 48496938: True} # 'BCDE', 'HIJK'
-    >>> list(chunker(fd, 4, chunks))
+    >>> list(chunkify(fd, 4, chunks))
     ['A', 'BCDE', 'FG', 'HIJK', 'LMN']
 
     >>> fd = StringIO.StringIO('ABCDEFGHIJKLMN')
     >>> chunks = {43909390: True, 50463030: True} # 'ABCD', 'KLMN'
-    >>> list(chunker(fd, 4, chunks))
+    >>> list(chunkify(fd, 4, chunks))
     ['ABCD', 'EFGH', 'IJ', 'KLMN']
     """
     data = 'X' + fd.read(chunk_size * 3)
@@ -62,7 +62,7 @@ def chunker(fd, chunk_size, chunks):
         if len(data) - i <= chunk_size:  # EOF?
             if len(data) > chunk_size + 1:
                 yield data[1:len(data) - chunk_size]
-                yield data[:chunk_size]
+                yield data[-chunk_size:]
             else:
                 yield data[1:]
             return