store.py 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. #!/usr/bin/env python
  2. import os
  3. import fcntl
  4. import tempfile
  5. import shutil
  6. import unittest
  7. import uuid
  8. CHUNK_SIZE = 256 * 1024
  9. NS_ARCHIVES = 'ARCHIVES'
  10. NS_CHUNKS = 'CHUNKS'
  11. class Store(object):
  12. """
  13. """
  14. class DoesNotExist(KeyError):
  15. """"""
  16. class AlreadyExists(KeyError):
  17. """"""
  18. IDLE = 'Idle'
  19. OPEN = 'Open'
  20. ACTIVE = 'Active'
  21. VERSION = 'DEDUPESTORE VERSION 1'
  22. def __init__(self, path):
  23. self.tid = '-1'
  24. self.state = Store.IDLE
  25. if not os.path.exists(path):
  26. self.create(path)
  27. self.open(path)
  28. def create(self, path):
  29. os.mkdir(path)
  30. open(os.path.join(path, 'version'), 'wb').write(self.VERSION)
  31. open(os.path.join(path, 'uuid'), 'wb').write(str(uuid.uuid4()))
  32. open(os.path.join(path, 'tid'), 'wb').write('0')
  33. os.mkdir(os.path.join(path, 'data'))
  34. def open(self, path):
  35. self.path = path
  36. if not os.path.isdir(path):
  37. raise Exception('%s Does not look like a store')
  38. version_path = os.path.join(path, 'version')
  39. if not os.path.exists(version_path) or open(version_path, 'rb').read() != self.VERSION:
  40. raise Exception('%s Does not look like a store2')
  41. self.uuid = open(os.path.join(path, 'uuid'), 'rb').read()
  42. self.lock_fd = open(os.path.join(path, 'lock'), 'w')
  43. fcntl.flock(self.lock_fd, fcntl.LOCK_EX)
  44. self.tid = int(open(os.path.join(path, 'tid'), 'r').read())
  45. self.recover()
  46. def recover(self):
  47. if os.path.exists(os.path.join(self.path, 'txn-active')):
  48. shutil.rmtree(os.path.join(self.path, 'txn-active'))
  49. if os.path.exists(os.path.join(self.path, 'txn-commit')):
  50. self.apply_txn()
  51. if os.path.exists(os.path.join(self.path, 'txn-applied')):
  52. shutil.rmtree(os.path.join(self.path, 'txn-applied'))
  53. self.state = Store.OPEN
  54. self.txn_delete = []
  55. self.txn_write = []
  56. def close(self):
  57. self.recover()
  58. self.lock_fd.close()
  59. self.state = Store.IDLE
  60. def commit(self):
  61. """
  62. """
  63. if self.state == Store.OPEN:
  64. return
  65. assert self.state == Store.ACTIVE
  66. with open(os.path.join(self.path, 'txn-active', 'delete_index'), 'wb') as fd:
  67. fd.write('\n'.join(self.txn_delete))
  68. with open(os.path.join(self.path, 'txn-active', 'write_index'), 'wb') as fd:
  69. fd.write('\n'.join(self.txn_write))
  70. with open(os.path.join(self.path, 'txn-active', 'tid'), 'wb') as fd:
  71. fd.write(str(self.tid + 1))
  72. os.rename(os.path.join(self.path, 'txn-active'),
  73. os.path.join(self.path, 'txn-commit'))
  74. self.recover()
  75. def apply_txn(self):
  76. assert os.path.isdir(os.path.join(self.path, 'txn-commit'))
  77. tid = int(open(os.path.join(self.path, 'txn-commit', 'tid'), 'rb').read())
  78. assert tid == self.tid + 1
  79. delete_list = [line.strip() for line in
  80. open(os.path.join(self.path, 'txn-commit', 'delete_index'), 'rb').readlines()]
  81. for name in delete_list:
  82. path = os.path.join(self.path, 'data', name)
  83. os.unlink(path)
  84. write_list = [line.strip() for line in
  85. open(os.path.join(self.path, 'txn-commit', 'write_index'), 'rb').readlines()]
  86. for name in write_list:
  87. destname = os.path.join(self.path, 'data', name)
  88. if not os.path.exists(os.path.dirname(destname)):
  89. os.makedirs(os.path.dirname(destname))
  90. os.rename(os.path.join(self.path, 'txn-commit', 'write', name), destname)
  91. with open(os.path.join(self.path, 'tid'), 'wb') as fd:
  92. fd.write(str(tid))
  93. os.rename(os.path.join(self.path, 'txn-commit'),
  94. os.path.join(self.path, 'txn-applied'))
  95. shutil.rmtree(os.path.join(self.path, 'txn-applied'))
  96. self.tid = tid
  97. def rollback(self):
  98. """
  99. """
  100. self.recover()
  101. def prepare_txn(self):
  102. if self.state == Store.ACTIVE:
  103. return os.path.join(self.path, 'txn-active')
  104. elif self.state == Store.OPEN:
  105. os.mkdir(os.path.join(self.path, 'txn-active'))
  106. os.mkdir(os.path.join(self.path, 'txn-active', 'write'))
  107. self.state = Store.ACTIVE
  108. def _filename(self, ns, id, base=''):
  109. ns = ns.encode('hex')
  110. id = id.encode('hex')
  111. return os.path.join(base, ns, id[:2], id[2:4], id[4:])
  112. def get(self, ns, id):
  113. """
  114. """
  115. path = self._filename(ns, id)
  116. if path in self.txn_write:
  117. filename = os.path.join(self.path, 'txn-active', 'write', path)
  118. return open(filename, 'rb').read()
  119. if path in self.txn_delete:
  120. raise Store.DoesNotExist('Object %s:%s does not exist' % (ns.encode('hex'), id.encode('hex')))
  121. filename = self._filename(ns, id, os.path.join(self.path, 'data'))
  122. if os.path.exists(filename):
  123. return open(filename, 'rb').read()
  124. else:
  125. raise Store.DoesNotExist('Object %s:%s does not exist' % (ns.encode('hex'), id.encode('hex')))
  126. def put(self, ns, id, data):
  127. """
  128. """
  129. self.prepare_txn()
  130. path = self._filename(ns, id)
  131. filename = self._filename(ns, id, os.path.join(self.path, 'data'))
  132. if (path in self.txn_write or
  133. (path not in self.txn_delete and os.path.exists(filename))):
  134. raise Store.AlreadyExists('Object already exists: %s:%s' % (ns.encode('hex'), id.encode('hex')))
  135. if path in self.txn_delete:
  136. self.txn_delete.remove(path)
  137. if path not in self.txn_write:
  138. self.txn_write.append(path)
  139. filename = self._filename(ns, id, os.path.join(self.path, 'txn-active', 'write'))
  140. if not os.path.exists(os.path.dirname(filename)):
  141. os.makedirs(os.path.dirname(filename))
  142. with open(filename, 'wb') as fd:
  143. fd.write(data)
  144. def delete(self, ns, id):
  145. """
  146. """
  147. self.prepare_txn()
  148. path = self._filename(ns, id)
  149. if path in self.txn_write:
  150. filename = self._filename(ns, id, os.path.join(self.path, 'txn-active', 'write'))
  151. self.txn_write.remove(path)
  152. os.unlink(filename)
  153. else:
  154. filename = os.path.join(self.path, 'data', path)
  155. if os.path.exists(filename):
  156. self.txn_delete.append(path)
  157. else:
  158. raise Store.DoesNotExist('Object does not exist: %s' % hash.encode('hex'))
  159. def list(self, ns, prefix='', marker=None, max_keys=1000000):
  160. for x in self._walker(os.path.join(self.path, 'data', ns.encode('hex')),
  161. prefix, marker, '', max_keys):
  162. yield x
  163. def _walker(self, path, prefix, marker, base, max_keys):
  164. n = 0
  165. for name in sorted(os.listdir(path)):
  166. if n >= max_keys:
  167. return
  168. id = name.decode('hex')
  169. if os.path.isdir(os.path.join(path, name)):
  170. if prefix and not id.startswith(prefix[:len(id)]):
  171. continue
  172. for x in self.foo(os.path.join(path, name),
  173. prefix[len(id):], marker,
  174. base + id, max_keys - n):
  175. yield x
  176. n += 1
  177. else:
  178. if prefix and not id.startswith(prefix):
  179. continue
  180. if not marker or base + id >= marker:
  181. yield base + id
  182. n += 1
  183. class StoreTestCase(unittest.TestCase):
  184. def setUp(self):
  185. self.tmppath = tempfile.mkdtemp()
  186. self.store = Store(os.path.join(self.tmppath, 'store'))
  187. def tearDown(self):
  188. shutil.rmtree(self.tmppath)
  189. def test1(self):
  190. self.assertEqual(self.store.tid, 0)
  191. self.assertEqual(self.store.state, Store.OPEN)
  192. self.store.put('SOMENS', 'SOMEID', 'SOMEDATA')
  193. self.assertRaises(Store.AlreadyExists, lambda: self.store.put('SOMENS', 'SOMEID', 'SOMEDATA'))
  194. self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA')
  195. self.store.rollback()
  196. self.assertRaises(Store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID'))
  197. self.assertEqual(self.store.tid, 0)
  198. def test2(self):
  199. self.assertEqual(self.store.tid, 0)
  200. self.assertEqual(self.store.state, Store.OPEN)
  201. self.store.put('SOMENS', 'SOMEID', 'SOMEDATA')
  202. self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA')
  203. self.store.commit()
  204. self.assertEqual(self.store.tid, 1)
  205. self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA')
  206. self.store.delete('SOMENS', 'SOMEID')
  207. self.assertRaises(Store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID'))
  208. self.store.rollback()
  209. self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA')
  210. self.store.delete('SOMENS', 'SOMEID')
  211. self.assertRaises(Store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID'))
  212. self.store.commit()
  213. self.assertEqual(self.store.tid, 2)
  214. self.assertRaises(Store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID'))
  215. def test_list(self):
  216. self.store.put('SOMENS', 'SOMEID12', 'SOMEDATA')
  217. self.store.put('SOMENS', 'SOMEID', 'SOMEDATA')
  218. self.store.put('SOMENS', 'SOMEID1', 'SOMEDATA')
  219. self.store.put('SOMENS', 'SOMEID123', 'SOMEDATA')
  220. self.store.commit()
  221. self.assertEqual(list(self.store.list('SOMENS', max_keys=3)),
  222. ['SOMEID', 'SOMEID1', 'SOMEID12'])
  223. self.assertEqual(list(self.store.list('SOMENS', marker='SOMEID12')),
  224. ['SOMEID12', 'SOMEID123'])
  225. self.assertEqual(list(self.store.list('SOMENS', prefix='SOMEID1', max_keys=2)),
  226. ['SOMEID1', 'SOMEID12'])
  227. self.assertEqual(list(self.store.list('SOMENS', prefix='SOMEID1', marker='SOMEID12')),
  228. ['SOMEID12', 'SOMEID123'])
  229. if __name__ == '__main__':
  230. unittest.main()