cache.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. import hashlib
  2. import os
  3. import msgpack
  4. import zlib
  5. NS_ARCHIVES = 'A'
  6. NS_CHUNKS = 'C'
  7. class Cache(object):
  8. """Client Side cache
  9. """
  10. def __init__(self, store):
  11. self.store = store
  12. self.path = os.path.join(os.path.expanduser('~'), '.dedupestore', 'cache',
  13. '%s.cache' % self.store.uuid)
  14. self.tid = -1
  15. self.open()
  16. if self.tid != self.store.tid:
  17. self.init()
  18. def open(self):
  19. if not os.path.exists(self.path):
  20. return
  21. data = open(self.path, 'rb').read()
  22. id = data[:32]
  23. data = data[32:]
  24. if hashlib.sha256(data).digest() != id:
  25. raise Exception('Cache hash did not match')
  26. data = msgpack.unpackb(zlib.decompress(data))
  27. if data['uuid'] != self.store.uuid:
  28. raise Exception('Cache UUID mismatch')
  29. self.chunkmap = data['chunkmap']
  30. self.archives = data['archives']
  31. self.tid = data['tid']
  32. def init(self):
  33. """Initializes cache by fetching and reading all archive indicies
  34. """
  35. self.chunkmap = {}
  36. self.archives = {}
  37. self.tid = self.store.tid
  38. if self.store.tid == 0:
  39. return
  40. for id in list(self.store.list(NS_ARCHIVES)):
  41. data = self.store.get(NS_ARCHIVES, id)
  42. if hashlib.sha256(data).digest() != id:
  43. raise Exception('Archive hash did not match')
  44. archive = msgpack.unpackb(zlib.decompress(data))
  45. self.archives[archive['name']] = id
  46. for item in archive['items']:
  47. if item['type'] != 'FILE':
  48. continue
  49. for idx in item['chunks']:
  50. id, size = archive['chunks'][idx]
  51. if self.seen_chunk(id):
  52. self.chunk_incref(id)
  53. else:
  54. self.init_chunk(id, size)
  55. self.save()
  56. def save(self):
  57. assert self.store.state == self.store.OPEN
  58. data = {'uuid': self.store.uuid,
  59. 'chunkmap': self.chunkmap,
  60. 'tid': self.store.tid, 'archives': self.archives}
  61. cachedir = os.path.dirname(self.path)
  62. if not os.path.exists(cachedir):
  63. os.makedirs(cachedir)
  64. with open(self.path, 'wb') as fd:
  65. data = zlib.compress(msgpack.packb(data))
  66. id = hashlib.sha256(data).digest()
  67. fd.write(id + data)
  68. def add_chunk(self, data):
  69. id = hashlib.sha256(data).digest()
  70. if self.seen_chunk(id):
  71. return self.chunk_incref(id)
  72. osize = len(data)
  73. data = zlib.compress(data)
  74. data = hashlib.sha256(data).digest() + data
  75. csize = len(data)
  76. self.store.put(NS_CHUNKS, id, data)
  77. return self.init_chunk(id, csize)
  78. def init_chunk(self, id, size):
  79. self.chunkmap[id] = (1, size)
  80. return id, size
  81. def seen_chunk(self, id):
  82. count, size = self.chunkmap.get(id, (0, 0))
  83. return count
  84. def chunk_incref(self, id):
  85. count, size = self.chunkmap[id]
  86. self.chunkmap[id] = (count + 1, size)
  87. return id, size
  88. def chunk_decref(self, id):
  89. count, size = self.chunkmap[id]
  90. if count == 1:
  91. del self.chunkmap[id]
  92. self.store.delete(NS_CHUNKS, id)
  93. else:
  94. self.chunkmap[id] = (count - 1, size)