1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192 |
- import os
- import sys
- import hashlib
- import zlib
- from repository import Repository
- CHUNKSIZE = 256 * 1024
- class FileItem(object):
-
- def __init__(self):
- """"""
- def process_file(self, filename, cache):
- self.filename = filename
- fd = open(filename, 'rb')
- self.size = 0
- self.chunks = []
- while True:
- data = fd.read(CHUNKSIZE)
- if not data:
- break
- self.size += len(data)
- self.chunks.append(cache.add_chunk(zlib.compress(data)))
- print '%s: %d chunks' % (filename, len(self.chunks))
- class Cache(object):
- """Client Side cache
- """
- def __init__(self, repo):
- self.repo = repo
- self.chunkmap = {}
- def chunk_filename(self, sha):
- hex = sha.encode('hex')
- return 'chunks/%s/%s/%s' % (hex[:2], hex[2:4], hex[4:])
- def add_chunk(self, data):
- sha = hashlib.sha1(data).digest()
- if not self.seen_chunk(sha):
- self.repo.put_file(self.chunk_filename(sha), data)
- else:
- print 'seen chunk', sha.encode('hex')
- self.chunk_incref(sha)
- return sha
- def seen_chunk(self, sha):
- return self.chunkmap.get(sha, 0) > 0
- def chunk_incref(self, sha):
- self.chunkmap.setdefault(sha, 0)
- self.chunkmap[sha] += 1
- def chunk_decref(self, sha):
- assert self.chunkmap.get(sha, 0) > 0
- self.chunkmap[sha] -= 1
- return self.chunkmap[sha]
- class Archive(object):
- """
- """
- def __init__(self):
- self.items = []
- def add_item(self, item):
- self.items.append(item)
- class Archiver(object):
- def __init__(self):
- self.cache = Cache(Repository('/tmp/repo'))
- self.archive = Archive()
- def run(self, path):
- for root, dirs, files in os.walk(path):
- for f in files:
- filename = os.path.join(root, f)
- item = FileItem()
- item.process_file(filename, self.cache)
- self.archive.add_item(item)
- self.cache.repo.commit()
- def main():
- archiver = Archiver()
- archiver.run(sys.argv[1])
- if __name__ == '__main__':
- main()
|