|
@@ -0,0 +1,83 @@
|
|
|
+def checksum(data, sum=0):
|
|
|
+ """Simple but fast checksum that can be updated at either end.
|
|
|
+
|
|
|
+ >>> checksum('FOOBAR')
|
|
|
+ 102367679
|
|
|
+ >>> checksum('FOOBAR') == checksum('BAR', checksum('FOO'))
|
|
|
+ True
|
|
|
+ """
|
|
|
+ s1 = sum & 0xffff
|
|
|
+ s2 = sum >> 16
|
|
|
+ for c in data:
|
|
|
+ s1 += ord(c) + 1
|
|
|
+ s2 += s1
|
|
|
+ return ((s2 & 0xffff) << 16) + (s1 & 0xffff)
|
|
|
+
|
|
|
+
|
|
|
+def roll_checksum(sum, remove, add, len):
|
|
|
+ """
|
|
|
+ >>> roll_checksum(checksum('XFOOBA'), 'X', 'R', 6) == checksum('FOOBAR')
|
|
|
+ True
|
|
|
+ """
|
|
|
+ s1 = sum & 0xffff
|
|
|
+ s2 = sum >> 16
|
|
|
+ add = ord(add)
|
|
|
+ remove = ord(remove)
|
|
|
+ s1 -= remove - add
|
|
|
+ s2 -= len * (remove + 1) - s1
|
|
|
+ return (s1 & 0xffff) + ((s2 & 0xffff) << 16)
|
|
|
+
|
|
|
+
|
|
|
+def chunker(fd, chunk_size, chunks):
|
|
|
+ """
|
|
|
+ >>> fd = StringIO.StringIO('ABCDEFGHIJKLMN')
|
|
|
+ >>> list(chunker(fd, 4, {}))
|
|
|
+ ['ABCD', 'EFGH', 'IJ', 'KLMN']
|
|
|
+
|
|
|
+ >>> fd = StringIO.StringIO('ABCDEFGHIJKLMN')
|
|
|
+ >>> chunks = {44564754: True} # 'BCDE'
|
|
|
+ >>> list(chunker(fd, 4, chunks))
|
|
|
+ ['A', 'BCDE', 'FGHI', 'J', 'KLMN']
|
|
|
+
|
|
|
+ >>> fd = StringIO.StringIO('ABCDEFGHIJKLMN')
|
|
|
+ >>> chunks = {44564754: True, 48496938: True} # 'BCDE', 'HIJK'
|
|
|
+ >>> list(chunker(fd, 4, chunks))
|
|
|
+ ['A', 'BCDE', 'FG', 'HIJK', 'LMN']
|
|
|
+
|
|
|
+ >>> fd = StringIO.StringIO('ABCDEFGHIJKLMN')
|
|
|
+ >>> chunks = {43909390: True, 50463030: True} # 'ABCD', 'KLMN'
|
|
|
+ >>> list(chunker(fd, 4, chunks))
|
|
|
+ ['ABCD', 'EFGH', 'IJ', 'KLMN']
|
|
|
+ """
|
|
|
+ data = 'X' + fd.read(chunk_size * 2)
|
|
|
+ i = 1
|
|
|
+ sum = checksum(data[:chunk_size])
|
|
|
+ while True:
|
|
|
+ if len(data) - i - 2 <= chunk_size:
|
|
|
+ data += fd.read(chunk_size * 2)
|
|
|
+ if i == chunk_size + 1:
|
|
|
+ yield data[1:chunk_size + 1]
|
|
|
+ i = 1
|
|
|
+ data = data[chunk_size:]
|
|
|
+ if len(data) - i <= chunk_size: # EOF?
|
|
|
+ if len(data) > chunk_size + 1:
|
|
|
+ yield data[1:len(data) - chunk_size]
|
|
|
+ yield data[-chunk_size:]
|
|
|
+ else:
|
|
|
+ yield data[1:]
|
|
|
+ return
|
|
|
+ sum = roll_checksum(sum, data[i - 1], data[i - 1 + chunk_size], chunk_size)
|
|
|
+ #print data[i:i + chunk_size], sum
|
|
|
+ if sum in chunks:
|
|
|
+ if i > 1:
|
|
|
+ yield data[1:i]
|
|
|
+ yield data[i:i + chunk_size]
|
|
|
+ data = data[i + chunk_size - 1:]
|
|
|
+ i = 0
|
|
|
+ sum = checksum(data[:chunk_size])
|
|
|
+ i += 1
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ import StringIO
|
|
|
+ import doctest
|
|
|
+ doctest.testmod()
|