|  | @@ -461,20 +461,11 @@ class HashIndexCompactTestCase(HashIndexDataTestCase):
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      def index_from_data(self):
 | 
	
		
			
				|  |  |          self.index_data.seek(0)
 | 
	
		
			
				|  |  | -        index = ChunkIndex.read(self.index_data)
 | 
	
		
			
				|  |  | +        # Since we are trying to carefully control the layout of the hashindex,
 | 
	
		
			
				|  |  | +        # we set permit_compact to prevent hashindex_read from resizing the hash table.
 | 
	
		
			
				|  |  | +        index = ChunkIndex.read(self.index_data, permit_compact=True)
 | 
	
		
			
				|  |  |          return index
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    def index_to_data(self, index):
 | 
	
		
			
				|  |  | -        data = io.BytesIO()
 | 
	
		
			
				|  |  | -        index.write(data)
 | 
	
		
			
				|  |  | -        return data.getvalue()
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    def index_from_data_compact_to_data(self):
 | 
	
		
			
				|  |  | -        index = self.index_from_data()
 | 
	
		
			
				|  |  | -        index.compact()
 | 
	
		
			
				|  |  | -        compact_index = self.index_to_data(index)
 | 
	
		
			
				|  |  | -        return compact_index
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  |      def write_entry(self, key, *values):
 | 
	
		
			
				|  |  |          self.index_data.write(key)
 | 
	
		
			
				|  |  |          for value in values:
 | 
	
	
		
			
				|  | @@ -486,87 +477,77 @@ class HashIndexCompactTestCase(HashIndexDataTestCase):
 | 
	
		
			
				|  |  |      def write_deleted(self, key):
 | 
	
		
			
				|  |  |          self.write_entry(key, 0xFFFFFFFE, 0, 0)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +    def compare_indexes(self, idx1, idx2):
 | 
	
		
			
				|  |  | +        """Check that the two hash tables contain the same data.  idx1
 | 
	
		
			
				|  |  | +        is allowed to have "mis-filed" entries, because we only need to
 | 
	
		
			
				|  |  | +        iterate over it.  But idx2 needs to support lookup."""
 | 
	
		
			
				|  |  | +        for k, v in idx1.iteritems():
 | 
	
		
			
				|  |  | +            assert v == idx2[k]
 | 
	
		
			
				|  |  | +        assert len(idx1) == len(idx2)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    def compare_compact(self, layout):
 | 
	
		
			
				|  |  | +        """A generic test of a hashindex with the specified layout.  layout should
 | 
	
		
			
				|  |  | +        be a string consisting only of the characters '*' (filled), 'D' (deleted)
 | 
	
		
			
				|  |  | +        and 'E' (empty).
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        num_buckets = len(layout)
 | 
	
		
			
				|  |  | +        num_empty = layout.count("E")
 | 
	
		
			
				|  |  | +        num_entries = layout.count("*")
 | 
	
		
			
				|  |  | +        self.index(num_entries=num_entries, num_buckets=num_buckets, num_empty=num_empty)
 | 
	
		
			
				|  |  | +        k = 0
 | 
	
		
			
				|  |  | +        for c in layout:
 | 
	
		
			
				|  |  | +            if c == "D":
 | 
	
		
			
				|  |  | +                self.write_deleted(H2(k))
 | 
	
		
			
				|  |  | +            elif c == "E":
 | 
	
		
			
				|  |  | +                self.write_empty(H2(k))
 | 
	
		
			
				|  |  | +            else:
 | 
	
		
			
				|  |  | +                assert c == "*"
 | 
	
		
			
				|  |  | +                self.write_entry(H2(k), 3 * k + 1, 3 * k + 2, 3 * k + 3)
 | 
	
		
			
				|  |  | +            k += 1
 | 
	
		
			
				|  |  | +        idx = self.index_from_data()
 | 
	
		
			
				|  |  | +        cpt = self.index_from_data()
 | 
	
		
			
				|  |  | +        cpt.compact()
 | 
	
		
			
				|  |  | +        # Note that idx is not a valid hash table, since the entries are not
 | 
	
		
			
				|  |  | +        # stored where they should be.  So lookups of the form idx[k] can fail.
 | 
	
		
			
				|  |  | +        # But cpt is a valid hash table, since there are no empty buckets.
 | 
	
		
			
				|  |  | +        assert idx.size() == 1024 + num_buckets * (32 + 3 * 4)
 | 
	
		
			
				|  |  | +        assert cpt.size() == 1024 + num_entries * (32 + 3 * 4)
 | 
	
		
			
				|  |  | +        self.compare_indexes(idx, cpt)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |      def test_simple(self):
 | 
	
		
			
				|  |  | -        self.index(num_entries=3, num_buckets=6, num_empty=2)
 | 
	
		
			
				|  |  | -        self.write_entry(H2(0), 1, 2, 3)
 | 
	
		
			
				|  |  | -        self.write_deleted(H2(1))
 | 
	
		
			
				|  |  | -        self.write_empty(H2(2))
 | 
	
		
			
				|  |  | -        self.write_entry(H2(3), 5, 6, 7)
 | 
	
		
			
				|  |  | -        self.write_entry(H2(4), 8, 9, 10)
 | 
	
		
			
				|  |  | -        self.write_empty(H2(5))
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -        compact_index = self.index_from_data_compact_to_data()
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -        self.index(num_entries=3, num_buckets=3, num_empty=0)
 | 
	
		
			
				|  |  | -        self.write_entry(H2(0), 1, 2, 3)
 | 
	
		
			
				|  |  | -        self.write_entry(H2(3), 5, 6, 7)
 | 
	
		
			
				|  |  | -        self.write_entry(H2(4), 8, 9, 10)
 | 
	
		
			
				|  |  | -        assert compact_index == self.index_data.getvalue()
 | 
	
		
			
				|  |  | +        self.compare_compact("*DE**E")
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      def test_first_empty(self):
 | 
	
		
			
				|  |  | -        self.index(num_entries=3, num_buckets=6, num_empty=2)
 | 
	
		
			
				|  |  | -        self.write_deleted(H2(1))
 | 
	
		
			
				|  |  | -        self.write_entry(H2(0), 1, 2, 3)
 | 
	
		
			
				|  |  | -        self.write_empty(H2(2))
 | 
	
		
			
				|  |  | -        self.write_entry(H2(3), 5, 6, 7)
 | 
	
		
			
				|  |  | -        self.write_entry(H2(4), 8, 9, 10)
 | 
	
		
			
				|  |  | -        self.write_empty(H2(5))
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -        compact_index = self.index_from_data_compact_to_data()
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -        self.index(num_entries=3, num_buckets=3, num_empty=0)
 | 
	
		
			
				|  |  | -        self.write_entry(H2(0), 1, 2, 3)
 | 
	
		
			
				|  |  | -        self.write_entry(H2(3), 5, 6, 7)
 | 
	
		
			
				|  |  | -        self.write_entry(H2(4), 8, 9, 10)
 | 
	
		
			
				|  |  | -        assert compact_index == self.index_data.getvalue()
 | 
	
		
			
				|  |  | +        self.compare_compact("D*E**E")
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      def test_last_used(self):
 | 
	
		
			
				|  |  | -        self.index(num_entries=3, num_buckets=6, num_empty=2)
 | 
	
		
			
				|  |  | -        self.write_deleted(H2(1))
 | 
	
		
			
				|  |  | -        self.write_entry(H2(0), 1, 2, 3)
 | 
	
		
			
				|  |  | -        self.write_empty(H2(2))
 | 
	
		
			
				|  |  | -        self.write_entry(H2(3), 5, 6, 7)
 | 
	
		
			
				|  |  | -        self.write_empty(H2(5))
 | 
	
		
			
				|  |  | -        self.write_entry(H2(4), 8, 9, 10)
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -        compact_index = self.index_from_data_compact_to_data()
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -        self.index(num_entries=3, num_buckets=3, num_empty=0)
 | 
	
		
			
				|  |  | -        self.write_entry(H2(0), 1, 2, 3)
 | 
	
		
			
				|  |  | -        self.write_entry(H2(3), 5, 6, 7)
 | 
	
		
			
				|  |  | -        self.write_entry(H2(4), 8, 9, 10)
 | 
	
		
			
				|  |  | -        assert compact_index == self.index_data.getvalue()
 | 
	
		
			
				|  |  | +        self.compare_compact("D*E*E*")
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      def test_too_few_empty_slots(self):
 | 
	
		
			
				|  |  | -        self.index(num_entries=3, num_buckets=6, num_empty=2)
 | 
	
		
			
				|  |  | -        self.write_deleted(H2(1))
 | 
	
		
			
				|  |  | -        self.write_entry(H2(0), 1, 2, 3)
 | 
	
		
			
				|  |  | -        self.write_entry(H2(3), 5, 6, 7)
 | 
	
		
			
				|  |  | -        self.write_empty(H2(2))
 | 
	
		
			
				|  |  | -        self.write_empty(H2(5))
 | 
	
		
			
				|  |  | -        self.write_entry(H2(4), 8, 9, 10)
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -        compact_index = self.index_from_data_compact_to_data()
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -        self.index(num_entries=3, num_buckets=3, num_empty=0)
 | 
	
		
			
				|  |  | -        self.write_entry(H2(0), 1, 2, 3)
 | 
	
		
			
				|  |  | -        self.write_entry(H2(3), 5, 6, 7)
 | 
	
		
			
				|  |  | -        self.write_entry(H2(4), 8, 9, 10)
 | 
	
		
			
				|  |  | -        assert compact_index == self.index_data.getvalue()
 | 
	
		
			
				|  |  | +        self.compare_compact("D**EE*")
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      def test_empty(self):
 | 
	
		
			
				|  |  | -        self.index(num_entries=0, num_buckets=6, num_empty=3)
 | 
	
		
			
				|  |  | -        self.write_deleted(H2(1))
 | 
	
		
			
				|  |  | -        self.write_empty(H2(0))
 | 
	
		
			
				|  |  | -        self.write_deleted(H2(3))
 | 
	
		
			
				|  |  | -        self.write_empty(H2(2))
 | 
	
		
			
				|  |  | -        self.write_empty(H2(5))
 | 
	
		
			
				|  |  | -        self.write_deleted(H2(4))
 | 
	
		
			
				|  |  | +        self.compare_compact("DEDEED")
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -        compact_index = self.index_from_data_compact_to_data()
 | 
	
		
			
				|  |  | +    def test_num_buckets_zero(self):
 | 
	
		
			
				|  |  | +        self.compare_compact("")
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -        self.index(num_entries=0, num_buckets=0, num_empty=0)
 | 
	
		
			
				|  |  | -        assert compact_index == self.index_data.getvalue()
 | 
	
		
			
				|  |  | +    def test_already_compact(self):
 | 
	
		
			
				|  |  | +        self.compare_compact("***")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    def test_all_at_front(self):
 | 
	
		
			
				|  |  | +        self.compare_compact("*DEEED")
 | 
	
		
			
				|  |  | +        self.compare_compact("**DEED")
 | 
	
		
			
				|  |  | +        self.compare_compact("***EED")
 | 
	
		
			
				|  |  | +        self.compare_compact("****ED")
 | 
	
		
			
				|  |  | +        self.compare_compact("*****D")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    def test_all_at_back(self):
 | 
	
		
			
				|  |  | +        self.compare_compact("EDEEE*")
 | 
	
		
			
				|  |  | +        self.compare_compact("DEDE**")
 | 
	
		
			
				|  |  | +        self.compare_compact("DED***")
 | 
	
		
			
				|  |  | +        self.compare_compact("ED****")
 | 
	
		
			
				|  |  | +        self.compare_compact("D*****")
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      def test_merge(self):
 | 
	
		
			
				|  |  |          master = ChunkIndex()
 | 
	
	
		
			
				|  | @@ -576,11 +557,8 @@ class HashIndexCompactTestCase(HashIndexDataTestCase):
 | 
	
		
			
				|  |  |          idx1[H(3)] = 3, 300
 | 
	
		
			
				|  |  |          idx1.compact()
 | 
	
		
			
				|  |  |          assert idx1.size() == 1024 + 3 * (32 + 2 * 4)
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  |          master.merge(idx1)
 | 
	
		
			
				|  |  | -        assert master[H(1)] == (1, 100)
 | 
	
		
			
				|  |  | -        assert master[H(2)] == (2, 200)
 | 
	
		
			
				|  |  | -        assert master[H(3)] == (3, 300)
 | 
	
		
			
				|  |  | +        self.compare_indexes(idx1, master)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  class NSIndexTestCase(BaseTestCase):
 |