cache.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. import cPickle
  2. import hashlib
  3. import os
  4. import sys
  5. import zlib
  6. from chunkifier import checksum
  7. from store import Store, NS_ARCHIVES, NS_CHUNKS
  8. class Cache(object):
  9. """Client Side cache
  10. """
  11. def __init__(self, store):
  12. self.store = store
  13. self.path = os.path.join(os.path.expanduser('~'), '.dedupestore', 'cache',
  14. '%s.cache' % self.store.uuid)
  15. self.tid = -1
  16. self.open()
  17. if self.tid != self.store.tid:
  18. self.init()
  19. def open(self):
  20. if not os.path.exists(self.path):
  21. return
  22. print 'Loading cache: ', self.path, '...'
  23. data = cPickle.loads(zlib.decompress(open(self.path, 'rb').read()))
  24. if data['uuid'] != self.store.uuid:
  25. print >> sys.stderr, 'Cache UUID mismatch'
  26. return
  27. self.chunkmap = data['chunkmap']
  28. self.summap = data['summap']
  29. self.archives = data['archives']
  30. self.tid = data['tid']
  31. print 'done'
  32. def init(self):
  33. """Initializes cache by fetching and reading all archive indicies
  34. """
  35. self.summap = {}
  36. self.chunkmap = {}
  37. self.archives = []
  38. self.tid = self.store.tid
  39. if self.store.tid == 0:
  40. return
  41. print 'Recreating cache...'
  42. for id in self.store.list(NS_ARCHIVES):
  43. archive = cPickle.loads(zlib.decompress(self.store.get(NS_ARCHIVES, id)))
  44. self.archives.append(archive['name'])
  45. for id, sum, csize, osize in archive['chunks']:
  46. if self.seen_chunk(id):
  47. self.chunk_incref(id)
  48. else:
  49. self.init_chunk(id, sum, csize, osize)
  50. print 'done'
  51. def save(self):
  52. assert self.store.state == Store.OPEN
  53. print 'saving cache'
  54. data = {'uuid': self.store.uuid,
  55. 'chunkmap': self.chunkmap, 'summap': self.summap,
  56. 'tid': self.store.tid, 'archives': self.archives}
  57. print 'Saving cache as:', self.path
  58. cachedir = os.path.dirname(self.path)
  59. if not os.path.exists(cachedir):
  60. os.makedirs(cachedir)
  61. with open(self.path, 'wb') as fd:
  62. fd.write(zlib.compress(cPickle.dumps(data)))
  63. print 'done'
  64. def add_chunk(self, data):
  65. sum = checksum(data)
  66. osize = len(data)
  67. data = zlib.compress(data)
  68. id = hashlib.sha1(data).digest()
  69. if self.seen_chunk(id):
  70. return self.chunk_incref(id)
  71. csize = len(data)
  72. self.store.put(NS_CHUNKS, id, data)
  73. return self.init_chunk(id, sum, csize, osize)
  74. def init_chunk(self, id, sum, csize, osize):
  75. self.chunkmap[id] = (1, sum, osize, csize)
  76. self.summap[sum] = self.summap.get(sum, 0) + 1
  77. return id, sum, csize, osize
  78. def seen_chunk(self, id):
  79. count, sum, csize, osize = self.chunkmap.get(id, (0, 0, 0, 0))
  80. return count
  81. def chunk_incref(self, id):
  82. count, sum, csize, osize = self.chunkmap[id]
  83. self.chunkmap[id] = (count + 1, sum, osize, csize)
  84. self.summap[sum] += 1
  85. return id, sum, csize, osize
  86. def chunk_decref(self, id):
  87. count, sum, csize, osize = self.chunkmap[id]
  88. sumcount = self.summap[sum]
  89. if sumcount == 1:
  90. del self.summap[sum]
  91. else:
  92. self.summap[sum] = sumcount - 1
  93. if count == 1:
  94. del self.chunkmap[id]
  95. print 'deleting chunk: ', id.encode('hex')
  96. self.store.delete(NS_CHUNKS, id)
  97. else:
  98. self.chunkmap[id] = (count - 1, sum, csize, osize)