bandstore.py 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. #!/usr/bin/env python
  2. import os
  3. import tempfile
  4. import shutil
  5. import unittest
  6. import sqlite3
  7. import uuid
  8. import fcntl
  9. class BandStore(object):
  10. """
  11. """
  12. class DoesNotExist(KeyError):
  13. """"""
  14. class AlreadyExists(KeyError):
  15. """"""
  16. IDLE = 'Idle'
  17. OPEN = 'Open'
  18. ACTIVE = 'Active'
  19. BAND_LIMIT = 1 * 1024 * 1024
  20. def __init__(self, path):
  21. self.read_fd = None
  22. self.write_fd = None
  23. if not os.path.exists(path):
  24. self.create(path)
  25. self.open(path)
  26. def open(self, path):
  27. if not os.path.isdir(path):
  28. raise Exception('%s Does not look like a store')
  29. db_path = os.path.join(path, 'dedupestore.db')
  30. if not os.path.exists(db_path):
  31. raise Exception('%s Does not look like a store2')
  32. self.lock_fd = open(os.path.join(path, 'lock'), 'w')
  33. fcntl.flock(self.lock_fd, fcntl.LOCK_EX)
  34. self.path = path
  35. self.cnx = sqlite3.connect(db_path)
  36. self.cursor = self.cnx.cursor()
  37. self._begin()
  38. def _begin(self):
  39. if self.read_fd:
  40. self.read_fd.close()
  41. self.read_fd = None
  42. if self.write_fd:
  43. self.write_fd.close()
  44. self.write_fd = None
  45. row = self.cursor.execute('SELECT uuid, tid, nextband, version, '
  46. 'bandlimit FROM system').fetchone()
  47. self.uuid, self.tid, self.nextband, version, self.bandlimit = row
  48. assert version == 1
  49. self.state = self.OPEN
  50. self.read_band = None
  51. self.write_band = None
  52. self.to_delete = set()
  53. band = self.nextband
  54. while os.path.exists(self.band_filename(band)):
  55. os.unlink(self.band_filename(band))
  56. band += 1
  57. self.delete_bands()
  58. def create(self, path):
  59. os.mkdir(path)
  60. os.mkdir(os.path.join(path, 'bands'))
  61. cnx = sqlite3.connect(os.path.join(path, 'dedupestore.db'))
  62. cnx.execute('CREATE TABLE objects(ns TEXT NOT NULL, id NOT NULL, '
  63. 'band NOT NULL, offset NOT NULL, size NOT NULL)')
  64. cnx.execute('CREATE UNIQUE INDEX objects_pk ON objects(ns, id)')
  65. cnx.execute('CREATE TABLE to_delete(band NOT NULL)')
  66. cnx.execute('CREATE UNIQUE INDEX to_delete_pk ON to_delete(band)')
  67. cnx.execute('CREATE TABLE system(uuid NOT NULL, tid NOT NULL, '
  68. 'nextband NOT NULL, version NOT NULL, bandlimit NOT NULL)')
  69. cnx.execute('INSERT INTO system VALUES(?,?,?,?,?)',
  70. (uuid.uuid1().hex, 0, 0, 1, self.BAND_LIMIT))
  71. cnx.commit()
  72. def close(self):
  73. self.rollback()
  74. self.cnx.close()
  75. self.lock_fd.close()
  76. os.unlink(os.path.join(self.path, 'lock'))
  77. def commit(self):
  78. """
  79. """
  80. self.band = None
  81. self.cursor.executemany('INSERT INTO to_delete(band) VALUES(?)',
  82. [[d] for d in self.to_delete])
  83. self.cursor.execute('UPDATE system SET tid=tid+1, nextband=?',
  84. (self.nextband,))
  85. self.cnx.commit()
  86. self.tid += 1
  87. self._begin()
  88. def delete_bands(self):
  89. self.cursor.execute('SELECT band FROM to_delete')
  90. to_delete = [r[0] for r in self.cursor.fetchall()]
  91. for b in to_delete:
  92. objects = self.cursor.execute('SELECT ns, id, offset, size '
  93. 'FROM objects WHERE band=? ORDER BY offset',
  94. (b,)).fetchall()
  95. for o in objects:
  96. band, offset, size = self.store_data(self.retrieve_data(b, *o[2:]))
  97. self.cursor.execute('UPDATE objects SET band=?, offset=?, size=? '
  98. 'WHERE ns=? AND id=?', (band, offset, size, o[0], o[1]))
  99. self.cursor.execute('DELETE FROM to_delete WHERE band=?', (b,))
  100. self.cursor.execute('UPDATE system SET nextband=?', (self.nextband,))
  101. self.cnx.commit()
  102. os.unlink(self.band_filename(b))
  103. def rollback(self):
  104. """
  105. """
  106. self.cnx.rollback()
  107. self._begin()
  108. def get(self, ns, id):
  109. """
  110. """
  111. self.cursor.execute('SELECT band, offset, size FROM objects WHERE ns=? and id=?',
  112. (ns.encode('hex'), id.encode('hex')))
  113. row = self.cursor.fetchone()
  114. if row:
  115. return self.retrieve_data(*row)
  116. else:
  117. raise self.DoesNotExist
  118. def band_filename(self, band):
  119. return os.path.join(self.path, 'bands', str(band / 1000), str(band))
  120. def retrieve_data(self, band, offset, size):
  121. if self.read_band != band:
  122. self.read_band = band
  123. if self.read_fd:
  124. self.read_fd.close()
  125. self.read_fd = open(self.band_filename(band), 'rb')
  126. self.read_fd.seek(offset)
  127. return self.read_fd.read(size)
  128. def store_data(self, data):
  129. if self.write_band is None:
  130. self.write_band = self.nextband
  131. self.nextband += 1
  132. if self.write_band % 1000 == 0:
  133. os.mkdir(os.path.join(self.path, 'bands', str(self.write_band / 1000)))
  134. assert not os.path.exists(self.band_filename(self.write_band))
  135. self.write_fd = open(self.band_filename(self.write_band), 'ab')
  136. band = self.write_band
  137. offset = self.write_fd.tell()
  138. self.write_fd.write(data)
  139. if offset + len(data) > self.bandlimit:
  140. self.write_band = None
  141. return band, offset, len(data)
  142. def put(self, ns, id, data):
  143. """
  144. """
  145. try:
  146. band, offset, size = self.store_data(data)
  147. self.cursor.execute('INSERT INTO objects (ns, id, band, offset, size) '
  148. 'VALUES(?, ?, ?, ?, ?)',
  149. (ns.encode('hex'), id.encode('hex'),
  150. band, offset, size))
  151. except sqlite3.IntegrityError:
  152. raise self.AlreadyExists
  153. def delete(self, ns, id):
  154. """
  155. """
  156. self.cursor.execute('SELECT band FROM objects WHERE ns=? and id=?',
  157. (ns.encode('hex'), id.encode('hex')))
  158. row = self.cursor.fetchone()
  159. if not row:
  160. raise self.DoesNotExist
  161. self.cursor.execute('DELETE FROM objects WHERE ns=? AND id=?',
  162. (ns.encode('hex'), id.encode('hex')))
  163. self.to_delete.add(row[0])
  164. def list(self, ns, prefix='', marker=None, max_keys=1000000):
  165. """
  166. """
  167. condition = ''
  168. if prefix:
  169. condition += ' AND id LIKE :prefix'
  170. if marker:
  171. condition += ' AND id >= :marker'
  172. args = dict(ns=ns.encode('hex'), prefix=prefix.encode('hex') + '%',
  173. marker=marker and marker.encode('hex'))
  174. for row in self.cursor.execute('SELECT id FROM objects WHERE '
  175. 'ns=:ns ' + condition + ' LIMIT ' + str(max_keys),
  176. args):
  177. yield row[0].decode('hex')
  178. class BandStoreTestCase(unittest.TestCase):
  179. def setUp(self):
  180. self.tmppath = tempfile.mkdtemp()
  181. self.store = BandStore(os.path.join(self.tmppath, 'store'))
  182. def tearDown(self):
  183. shutil.rmtree(self.tmppath)
  184. def test1(self):
  185. self.assertEqual(self.store.tid, 0)
  186. self.assertEqual(self.store.state, self.store.OPEN)
  187. self.store.put('SOMENS', 'SOMEID', 'SOMEDATA')
  188. self.assertRaises(self.store.AlreadyExists, lambda: self.store.put('SOMENS', 'SOMEID', 'SOMEDATA'))
  189. self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA')
  190. self.store.rollback()
  191. self.assertRaises(self.store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID'))
  192. self.assertEqual(self.store.tid, 0)
  193. def test2(self):
  194. self.assertEqual(self.store.tid, 0)
  195. self.assertEqual(self.store.state, self.store.OPEN)
  196. self.store.put('SOMENS', 'SOMEID', 'SOMEDATA')
  197. self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA')
  198. self.store.commit()
  199. self.assertEqual(self.store.tid, 1)
  200. self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA')
  201. self.store.delete('SOMENS', 'SOMEID')
  202. self.assertRaises(self.store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID'))
  203. self.store.rollback()
  204. self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA')
  205. self.store.delete('SOMENS', 'SOMEID')
  206. self.assertRaises(self.store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID'))
  207. self.store.commit()
  208. self.assertEqual(self.store.tid, 2)
  209. self.assertRaises(self.store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID'))
  210. def test_list(self):
  211. self.store.put('SOMENS', 'SOMEID12', 'SOMEDATA')
  212. self.store.put('SOMENS', 'SOMEID', 'SOMEDATA')
  213. self.store.put('SOMENS', 'SOMEID1', 'SOMEDATA')
  214. self.store.put('SOMENS', 'SOMEID123', 'SOMEDATA')
  215. self.store.commit()
  216. self.assertEqual(list(self.store.list('SOMENS', max_keys=3)),
  217. ['SOMEID', 'SOMEID1', 'SOMEID12'])
  218. self.assertEqual(list(self.store.list('SOMENS', marker='SOMEID12')),
  219. ['SOMEID12', 'SOMEID123'])
  220. self.assertEqual(list(self.store.list('SOMENS', prefix='SOMEID1', max_keys=2)),
  221. ['SOMEID1', 'SOMEID12'])
  222. self.assertEqual(list(self.store.list('SOMENS', prefix='SOMEID1', marker='SOMEID12')),
  223. ['SOMEID12', 'SOMEID123'])
  224. if __name__ == '__main__':
  225. unittest.main()