Browse Source

An experimental sqlite based storage implementation.

Jonas Borgström 15 years ago
parent
commit
62a4a37544
4 changed files with 172 additions and 13 deletions
  1. 6 3
      dedupestore/archiver.py
  2. 8 4
      dedupestore/cache.py
  3. 1 6
      dedupestore/fsstore.py
  4. 157 0
      dedupestore/sqlitestore.py

+ 6 - 3
dedupestore/archiver.py

@@ -5,8 +5,11 @@ import cPickle
 from optparse import OptionParser
 
 from chunkifier import chunkify
-from cache import Cache
-from store import Store, NS_ARCHIVES, NS_CHUNKS, CHUNK_SIZE
+from cache import Cache, NS_ARCHIVES, NS_CHUNKS
+from sqlitestore import SqliteStore
+
+
+CHUNK_SIZE = 256 * 1024
 
 
 class Archive(object):
@@ -202,7 +205,7 @@ class Archiver(object):
                         help="Display archive statistics", metavar="ARCHIVE")
         (options, args) = parser.parse_args()
         if options.store:
-            self.store = Store(options.store)
+            self.store = SqliteStore(options.store)
         else:
             parser.error('No store path specified')
         self.cache = Cache(self.store)

+ 8 - 4
dedupestore/cache.py

@@ -5,7 +5,9 @@ import sys
 import zlib
 
 from chunkifier import checksum
-from store import Store, NS_ARCHIVES, NS_CHUNKS
+
+NS_ARCHIVES = 'ARCHIVES'
+NS_CHUNKS = 'CHUNKS'
 
 
 class Cache(object):
@@ -46,6 +48,8 @@ class Cache(object):
             return
         print 'Recreating cache...'
         for id in self.store.list(NS_ARCHIVES):
+
+
             archive = cPickle.loads(zlib.decompress(self.store.get(NS_ARCHIVES, id)))
             self.archives.append(archive['name'])
             for id, sum, csize, osize in archive['chunks']:
@@ -56,7 +60,7 @@ class Cache(object):
         print 'done'
 
     def save(self):
-        assert self.store.state == Store.OPEN
+        assert self.store.state == self.store.OPEN
         print 'saving cache'
         data = {'uuid': self.store.uuid,
                 'chunkmap': self.chunkmap, 'summap': self.summap,
@@ -81,7 +85,7 @@ class Cache(object):
         return self.init_chunk(id, sum, csize, osize)
 
     def init_chunk(self, id, sum, csize, osize):
-        self.chunkmap[id] = (1, sum, osize, csize)
+        self.chunkmap[id] = (1, sum, csize, osize)
         self.summap[sum] = self.summap.get(sum, 0) + 1
         return id, sum, csize, osize
 
@@ -91,7 +95,7 @@ class Cache(object):
 
     def chunk_incref(self, id):
         count, sum, csize, osize = self.chunkmap[id]
-        self.chunkmap[id] = (count + 1, sum, osize, csize)
+        self.chunkmap[id] = (count + 1, sum, csize, osize)
         self.summap[sum] += 1
         return id, sum, csize, osize
 

+ 1 - 6
dedupestore/store.py → dedupestore/fsstore.py

@@ -7,11 +7,6 @@ import unittest
 import uuid
 
 
-CHUNK_SIZE = 256 * 1024
-NS_ARCHIVES = 'ARCHIVES'
-NS_CHUNKS = 'CHUNKS'
-
-
 class Store(object):
     """
     """
@@ -33,7 +28,7 @@ class Store(object):
         if not os.path.exists(path):
             self.create(path)
         self.open(path)
-    
+
     def create(self, path):
         os.mkdir(path)
         open(os.path.join(path, 'version'), 'wb').write(self.VERSION)

+ 157 - 0
dedupestore/sqlitestore.py

@@ -0,0 +1,157 @@
+#!/usr/bin/env python
+import os
+import tempfile
+import shutil
+import unittest
+import sqlite3
+import uuid
+
+
+class SqliteStore(object):
+    """
+    """
+
+    class DoesNotExist(KeyError):
+        """"""
+
+    class AlreadyExists(KeyError):
+        """"""
+
+    IDLE = 'Idle'
+    OPEN = 'Open'
+    ACTIVE = 'Active'
+    VERSION = 'DEDUPESTORE VERSION 1'
+
+    def __init__(self, path):
+        if not os.path.exists(path):
+            self.create(path)
+        self.cnx = sqlite3.connect(path)
+        self.cursor = self.cnx.cursor()
+        self.uuid, self.tid = self.cursor.execute('SELECT uuid, tid FROM system').fetchone()
+        self.state = self.OPEN
+
+    def create(self, path):
+        cnx = sqlite3.connect(path)
+        cnx.execute('PRAGMA auto_vacuum=full')
+        cnx.execute('CREATE TABLE objects(ns TEXT NOT NULL, id NOT NULL, data NOT NULL)')
+        cnx.execute('CREATE TABLE system(uuid NOT NULL, tid NOT NULL)')
+        cnx.execute('INSERT INTO system VALUES(?,?)', (uuid.uuid1().hex, 0))
+        cnx.execute('CREATE UNIQUE INDEX objects_pk ON objects(ns, id)')
+
+    def close(self):
+        self.cnx.close()
+
+    def commit(self):
+        """
+        """
+        self.cursor.execute('UPDATE system SET tid=tid+1')
+        import time
+        t = time.time()
+        self.cnx.commit()
+        print time.time() - t
+        self.tid += 1
+
+    def rollback(self):
+        """
+        """
+        self.cnx.rollback()
+
+    def get(self, ns, id):
+        """
+        """
+        self.cursor.execute('SELECT data FROM objects WHERE ns=? and id=?',
+                            (ns.encode('hex'), id.encode('hex')))
+        row = self.cursor.fetchone()
+        if row:
+            return str(row[0])
+        else:
+            raise self.DoesNotExist
+
+    def put(self, ns, id, data):
+        """
+        """
+        try:
+            self.cursor.execute('INSERT INTO objects (ns, id, data) '
+                                'VALUES(?, ?, ?)',
+                                (ns.encode('hex'), id.encode('hex'),
+                                sqlite3.Binary(data)))
+        except sqlite3.IntegrityError:
+            raise self.AlreadyExists
+
+    def delete(self, ns, id):
+        """
+        """
+        self.cursor.execute('DELETE FROM objects WHERE ns=? AND id=?',
+                           (ns.encode('hex'), id.encode('hex')))
+
+    def list(self, ns, prefix='', marker=None, max_keys=1000000):
+        """
+        """
+        condition = ''
+        if prefix:
+            condition += ' AND id LIKE :prefix'
+        if marker:
+            condition += ' AND id >= :marker'
+        args = dict(ns=ns.encode('hex'), prefix=prefix.encode('hex') + '%',
+                    marker=marker and marker.encode('hex'))
+        for row in self.cursor.execute('SELECT id FROM objects WHERE '
+                                'ns=:ns ' + condition + ' LIMIT ' + str(max_keys),
+                                args):
+            yield row[0].decode('hex')
+
+
+class SqliteStoreTestCase(unittest.TestCase):
+ 
+    def setUp(self):
+        self.tmppath = tempfile.mkdtemp()
+        self.store = SqliteStore(os.path.join(self.tmppath, 'store'))
+
+    def tearDown(self):
+        shutil.rmtree(self.tmppath)
+    
+    def test1(self):
+        self.assertEqual(self.store.tid, 0)
+        self.assertEqual(self.store.state, self.store.OPEN)
+        self.store.put('SOMENS', 'SOMEID', 'SOMEDATA')
+        self.assertRaises(self.store.AlreadyExists, lambda: self.store.put('SOMENS', 'SOMEID', 'SOMEDATA'))
+        self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA')
+        self.store.rollback()
+        self.assertRaises(self.store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID'))
+        self.assertEqual(self.store.tid, 0)
+
+    def test2(self):
+        self.assertEqual(self.store.tid, 0)
+        self.assertEqual(self.store.state, self.store.OPEN)
+        self.store.put('SOMENS', 'SOMEID', 'SOMEDATA')
+        self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA')
+        self.store.commit()
+        self.assertEqual(self.store.tid, 1)
+        self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA')
+        self.store.delete('SOMENS', 'SOMEID')
+        self.assertRaises(self.store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID'))
+        self.store.rollback()
+        self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA')
+        self.store.delete('SOMENS', 'SOMEID')
+        self.assertRaises(self.store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID'))
+        self.store.commit()
+        self.assertEqual(self.store.tid, 2)
+        self.assertRaises(self.store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID'))
+
+    def test_list(self):
+        self.store.put('SOMENS', 'SOMEID12', 'SOMEDATA')
+        self.store.put('SOMENS', 'SOMEID', 'SOMEDATA')
+        self.store.put('SOMENS', 'SOMEID1', 'SOMEDATA')
+        self.store.put('SOMENS', 'SOMEID123', 'SOMEDATA')
+        self.store.commit()
+        self.assertEqual(list(self.store.list('SOMENS', max_keys=3)), 
+            ['SOMEID', 'SOMEID1', 'SOMEID12'])
+        self.assertEqual(list(self.store.list('SOMENS', marker='SOMEID12')), 
+            ['SOMEID12', 'SOMEID123'])
+        self.assertEqual(list(self.store.list('SOMENS', prefix='SOMEID1', max_keys=2)), 
+            ['SOMEID1', 'SOMEID12'])
+        self.assertEqual(list(self.store.list('SOMENS', prefix='SOMEID1', marker='SOMEID12')), 
+            ['SOMEID12', 'SOMEID123'])
+
+
+if __name__ == '__main__':
+    unittest.main()