|
@@ -2,35 +2,34 @@ import os
|
|
|
import sys
|
|
|
import hashlib
|
|
|
import zlib
|
|
|
+import cPickle
|
|
|
from repository import Repository
|
|
|
|
|
|
CHUNKSIZE = 256 * 1024
|
|
|
|
|
|
-class FileItem(object):
|
|
|
-
|
|
|
- def __init__(self):
|
|
|
- """"""
|
|
|
-
|
|
|
- def process_file(self, filename, cache):
|
|
|
- self.filename = filename
|
|
|
- fd = open(filename, 'rb')
|
|
|
- self.size = 0
|
|
|
- self.chunks = []
|
|
|
- while True:
|
|
|
- data = fd.read(CHUNKSIZE)
|
|
|
- if not data:
|
|
|
- break
|
|
|
- self.size += len(data)
|
|
|
- self.chunks.append(cache.add_chunk(zlib.compress(data)))
|
|
|
- print '%s: %d chunks' % (filename, len(self.chunks))
|
|
|
-
|
|
|
|
|
|
class Cache(object):
|
|
|
"""Client Side cache
|
|
|
"""
|
|
|
- def __init__(self, repo):
|
|
|
+ def __init__(self, path, repo):
|
|
|
self.repo = repo
|
|
|
self.chunkmap = {}
|
|
|
+ self.archives = []
|
|
|
+ self.open(path)
|
|
|
+
|
|
|
+ def open(self, path):
|
|
|
+ for archive in self.repo.listdir('archives'):
|
|
|
+ self.archives.append(archive)
|
|
|
+ data = self.repo.get_file(os.path.join('archives', archive))
|
|
|
+ a = cPickle.loads(zlib.decompress(data))
|
|
|
+ for item in a['items']:
|
|
|
+ if item['type'] == 'FILE':
|
|
|
+ for c in item['chunks']:
|
|
|
+ print 'adding chunk', c.encode('hex')
|
|
|
+ self.chunk_incref(c)
|
|
|
+
|
|
|
+ def save(self):
|
|
|
+ assert self.repo.state == Repository.OPEN
|
|
|
|
|
|
def chunk_filename(self, sha):
|
|
|
hex = sha.encode('hex')
|
|
@@ -57,36 +56,51 @@ class Cache(object):
|
|
|
self.chunkmap[sha] -= 1
|
|
|
return self.chunkmap[sha]
|
|
|
|
|
|
-
|
|
|
-class Archive(object):
|
|
|
- """
|
|
|
- """
|
|
|
- def __init__(self):
|
|
|
- self.items = []
|
|
|
-
|
|
|
- def add_item(self, item):
|
|
|
- self.items.append(item)
|
|
|
-
|
|
|
-
|
|
|
class Archiver(object):
|
|
|
|
|
|
def __init__(self):
|
|
|
- self.cache = Cache(Repository('/tmp/repo'))
|
|
|
- self.archive = Archive()
|
|
|
+ self.repo = Repository('/tmp/repo')
|
|
|
+ self.cache = Cache('/tmp/cache', self.repo)
|
|
|
|
|
|
- def run(self, path):
|
|
|
+ def create_archive(self, archive_name, path):
|
|
|
+ if archive_name in self.cache.archives:
|
|
|
+ raise Exception('Archive "%s" already exists' % archive_name)
|
|
|
+ items = []
|
|
|
for root, dirs, files in os.walk(path):
|
|
|
+ for d in dirs:
|
|
|
+ name = os.path.join(root, d)
|
|
|
+ items.append(self.process_dir(name, self.cache))
|
|
|
for f in files:
|
|
|
- filename = os.path.join(root, f)
|
|
|
- item = FileItem()
|
|
|
- item.process_file(filename, self.cache)
|
|
|
- self.archive.add_item(item)
|
|
|
- self.cache.repo.commit()
|
|
|
+ name = os.path.join(root, f)
|
|
|
+ items.append(self.process_file(name, self.cache))
|
|
|
+ archive = {'name': name, 'items': items}
|
|
|
+ zdata = zlib.compress(cPickle.dumps(archive))
|
|
|
+ self.repo.put_file(os.path.join('archives', archive_name), zdata)
|
|
|
+ print 'Archive file size: %d' % len(zdata)
|
|
|
+ self.repo.commit()
|
|
|
+ self.cache.save()
|
|
|
+
|
|
|
+ def process_dir(self, path, cache):
|
|
|
+ print 'Directory: %s' % (path)
|
|
|
+ return {'type': 'DIR', 'path': path}
|
|
|
+
|
|
|
+ def process_file(self, path, cache):
|
|
|
+ fd = open(path, 'rb')
|
|
|
+ size = 0
|
|
|
+ chunks = []
|
|
|
+ while True:
|
|
|
+ data = fd.read(CHUNKSIZE)
|
|
|
+ if not data:
|
|
|
+ break
|
|
|
+ size += len(data)
|
|
|
+ chunks.append(cache.add_chunk(zlib.compress(data)))
|
|
|
+ print 'File: %s (%d chunks)' % (path, len(chunks))
|
|
|
+ return {'type': 'FILE', 'path': path, 'size': size, 'chunks': chunks}
|
|
|
|
|
|
|
|
|
def main():
|
|
|
archiver = Archiver()
|
|
|
- archiver.run(sys.argv[1])
|
|
|
+ archiver.create_archive(sys.argv[1], sys.argv[2])
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
main()
|