bandstore.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. #!/usr/bin/env python
  2. import os
  3. import tempfile
  4. import shutil
  5. import unittest
  6. import sqlite3
  7. import uuid
  8. import fcntl
  9. class BandStore(object):
  10. """
  11. """
  12. class DoesNotExist(KeyError):
  13. """"""
  14. class AlreadyExists(KeyError):
  15. """"""
  16. IDLE = 'Idle'
  17. OPEN = 'Open'
  18. ACTIVE = 'Active'
  19. BAND_LIMIT = 10 * 1024 * 1024
  20. def __init__(self, path):
  21. if not os.path.exists(path):
  22. self.create(path)
  23. self.open(path)
  24. def open(self, path):
  25. if not os.path.isdir(path):
  26. raise Exception('%s Does not look like a store')
  27. db_path = os.path.join(path, 'dedupestore.db')
  28. if not os.path.exists(db_path):
  29. raise Exception('%s Does not look like a store2')
  30. self.lock_fd = open(os.path.join(path, 'lock'), 'w')
  31. fcntl.flock(self.lock_fd, fcntl.LOCK_EX)
  32. self.path = path
  33. self.cnx = sqlite3.connect(db_path)
  34. self.cursor = self.cnx.cursor()
  35. self._begin()
  36. def _begin(self):
  37. row = self.cursor.execute('SELECT uuid, tid, nextband, version, '
  38. 'bandlimit FROM system').fetchone()
  39. self.uuid, self.tid, self.nextband, version, self.bandlimit = row
  40. assert version == 1
  41. self.state = self.OPEN
  42. self.band = None
  43. self.to_delete = set()
  44. band = self.nextband
  45. while os.path.exists(self.band_filename(band)):
  46. os.unlink(self.band_filename(band))
  47. band += 1
  48. def create(self, path):
  49. os.mkdir(path)
  50. os.mkdir(os.path.join(path, 'bands'))
  51. cnx = sqlite3.connect(os.path.join(path, 'dedupestore.db'))
  52. cnx.execute('CREATE TABLE objects(ns TEXT NOT NULL, id NOT NULL, '
  53. 'band NOT NULL, offset NOT NULL, size NOT NULL)')
  54. cnx.execute('CREATE TABLE system(uuid NOT NULL, tid NOT NULL, '
  55. 'nextband NOT NULL, version NOT NULL, bandlimit NOT NULL)')
  56. cnx.execute('INSERT INTO system VALUES(?,?,?,?,?)',
  57. (uuid.uuid1().hex, 0, 0, 1, self.BAND_LIMIT))
  58. cnx.execute('CREATE UNIQUE INDEX objects_pk ON objects(ns, id)')
  59. def close(self):
  60. self.cnx.close()
  61. self.lock_fd.close()
  62. os.unlink(os.path.join(self.path, 'lock'))
  63. def commit(self):
  64. """
  65. """
  66. self.band = None
  67. for b in self.to_delete:
  68. objects = self.cursor.execute('SELECT ns, id, offset, size '
  69. 'FROM objects WHERE band=?', (b,)).fetchall()
  70. for o in objects:
  71. band, offset, size = self.store_data(self.retrieve_data(b, *o[2:]))
  72. self.cursor.execute('UPDATE objects SET band=?, offset=?, size=? '
  73. 'WHERE ns=? AND id=?', (band, offset, size, o[0], o[1]))
  74. os.unlink(os.path.join(self.path, 'bands', str(b)))
  75. self.cursor.execute('UPDATE system SET tid=tid+1, nextband=?',
  76. (self.nextband,))
  77. self.cnx.commit()
  78. self.tid += 1
  79. self._begin()
  80. def rollback(self):
  81. """
  82. """
  83. self.cnx.rollback()
  84. self._begin()
  85. def get(self, ns, id):
  86. """
  87. """
  88. self.cursor.execute('SELECT band, offset, size FROM objects WHERE ns=? and id=?',
  89. (ns.encode('hex'), id.encode('hex')))
  90. row = self.cursor.fetchone()
  91. if row:
  92. return self.retrieve_data(*row)
  93. else:
  94. raise self.DoesNotExist
  95. def band_filename(self, band):
  96. return os.path.join(self.path, 'bands', str(band))
  97. def retrieve_data(self, band, offset, size):
  98. with open(self.band_filename(band), 'rb') as fd:
  99. fd.seek(offset)
  100. return fd.read(size)
  101. def store_data(self, data):
  102. if self.band is None:
  103. self.band = self.nextband
  104. assert not os.path.exists(self.band_filename(self.band))
  105. self.nextband += 1
  106. band = self.band
  107. with open(self.band_filename(band), 'ab') as fd:
  108. offset = fd.tell()
  109. fd.write(data)
  110. if offset + len(data) > self.bandlimit:
  111. self.band = None
  112. return band, offset, len(data)
  113. def put(self, ns, id, data):
  114. """
  115. """
  116. try:
  117. band, offset, size = self.store_data(data)
  118. self.cursor.execute('INSERT INTO objects (ns, id, band, offset, size) '
  119. 'VALUES(?, ?, ?, ?, ?)',
  120. (ns.encode('hex'), id.encode('hex'),
  121. band, offset, size))
  122. except sqlite3.IntegrityError:
  123. raise self.AlreadyExists
  124. def delete(self, ns, id):
  125. """
  126. """
  127. self.cursor.execute('SELECT band FROM objects WHERE ns=? and id=?',
  128. (ns.encode('hex'), id.encode('hex')))
  129. row = self.cursor.fetchone()
  130. if not row:
  131. raise self.DoesNotExist
  132. self.cursor.execute('DELETE FROM objects WHERE ns=? AND id=?',
  133. (ns.encode('hex'), id.encode('hex')))
  134. self.to_delete.add(row[0])
  135. def list(self, ns, prefix='', marker=None, max_keys=1000000):
  136. """
  137. """
  138. condition = ''
  139. if prefix:
  140. condition += ' AND id LIKE :prefix'
  141. if marker:
  142. condition += ' AND id >= :marker'
  143. args = dict(ns=ns.encode('hex'), prefix=prefix.encode('hex') + '%',
  144. marker=marker and marker.encode('hex'))
  145. for row in self.cursor.execute('SELECT id FROM objects WHERE '
  146. 'ns=:ns ' + condition + ' LIMIT ' + str(max_keys),
  147. args):
  148. yield row[0].decode('hex')
  149. class BandStoreTestCase(unittest.TestCase):
  150. def setUp(self):
  151. self.tmppath = tempfile.mkdtemp()
  152. self.store = BandStore(os.path.join(self.tmppath, 'store'))
  153. def tearDown(self):
  154. shutil.rmtree(self.tmppath)
  155. def test1(self):
  156. self.assertEqual(self.store.tid, 0)
  157. self.assertEqual(self.store.state, self.store.OPEN)
  158. self.store.put('SOMENS', 'SOMEID', 'SOMEDATA')
  159. self.assertRaises(self.store.AlreadyExists, lambda: self.store.put('SOMENS', 'SOMEID', 'SOMEDATA'))
  160. self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA')
  161. self.store.rollback()
  162. self.assertRaises(self.store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID'))
  163. self.assertEqual(self.store.tid, 0)
  164. def test2(self):
  165. self.assertEqual(self.store.tid, 0)
  166. self.assertEqual(self.store.state, self.store.OPEN)
  167. self.store.put('SOMENS', 'SOMEID', 'SOMEDATA')
  168. self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA')
  169. self.store.commit()
  170. self.assertEqual(self.store.tid, 1)
  171. self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA')
  172. self.store.delete('SOMENS', 'SOMEID')
  173. self.assertRaises(self.store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID'))
  174. self.store.rollback()
  175. self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA')
  176. self.store.delete('SOMENS', 'SOMEID')
  177. self.assertRaises(self.store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID'))
  178. self.store.commit()
  179. self.assertEqual(self.store.tid, 2)
  180. self.assertRaises(self.store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID'))
  181. def test_list(self):
  182. self.store.put('SOMENS', 'SOMEID12', 'SOMEDATA')
  183. self.store.put('SOMENS', 'SOMEID', 'SOMEDATA')
  184. self.store.put('SOMENS', 'SOMEID1', 'SOMEDATA')
  185. self.store.put('SOMENS', 'SOMEID123', 'SOMEDATA')
  186. self.store.commit()
  187. self.assertEqual(list(self.store.list('SOMENS', max_keys=3)),
  188. ['SOMEID', 'SOMEID1', 'SOMEID12'])
  189. self.assertEqual(list(self.store.list('SOMENS', marker='SOMEID12')),
  190. ['SOMEID12', 'SOMEID123'])
  191. self.assertEqual(list(self.store.list('SOMENS', prefix='SOMEID1', max_keys=2)),
  192. ['SOMEID1', 'SOMEID12'])
  193. self.assertEqual(list(self.store.list('SOMENS', prefix='SOMEID1', marker='SOMEID12')),
  194. ['SOMEID12', 'SOMEID123'])
  195. if __name__ == '__main__':
  196. unittest.main()