| 
					
				 | 
			
			
				@@ -391,6 +391,38 @@ def get_item_uid_gid(item, *, numeric, uid_forced=None, gid_forced=None, uid_def 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     return uid, gid 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def archive_get_items(metadata, key, repository): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if "item_ptrs" in metadata:  # looks like a v2+ archive 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        assert "items" not in metadata 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        items = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for id, data in zip(metadata.item_ptrs, repository.get_many(metadata.item_ptrs)): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            data = key.decrypt(id, data) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            ids = msgpack.unpackb(data) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            items.extend(ids) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return items 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if "items" in metadata:  # legacy, v1 archive 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        assert "item_ptrs" not in metadata 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return metadata.items 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def archive_put_items(chunk_ids, *, key, cache=None, stats=None, add_reference=None): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    """gets a (potentially large) list of archive metadata stream chunk ids and writes them to repo objects""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    item_ptrs = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    for i in range(0, len(chunk_ids), IDS_PER_CHUNK): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        data = msgpack.packb(chunk_ids[i : i + IDS_PER_CHUNK]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        id = key.id_hash(data) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if cache is not None and stats is not None: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            cache.add_chunk(id, data, stats) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        elif add_reference is not None: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            cdata = key.encrypt(id, data) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            add_reference(id, len(data), cdata) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            raise NotImplementedError 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        item_ptrs.append(id) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return item_ptrs 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 class Archive: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     class DoesNotExist(Error): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         """Archive {} does not exist""" 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -479,6 +511,8 @@ class Archive: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         metadata = ArchiveItem(internal_dict=msgpack.unpackb(data)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if metadata.version not in (1, 2):  # legacy: still need to read v1 archives 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             raise Exception("Unknown archive metadata version") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # note: metadata.items must not get written to disk! 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        metadata.items = archive_get_items(metadata, self.key, self.repository) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         return metadata 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     def load(self, id): 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -512,10 +546,6 @@ class Archive: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     def duration_from_meta(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         return format_timedelta(self.ts_end - self.ts) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    def _archive_csize(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        cdata = self.repository.get(self.id) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        return len(cdata) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     def info(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if self.create: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             stats = self.stats 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -532,7 +562,6 @@ class Archive: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             "end": OutputTimestamp(end), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             "duration": (end - start).total_seconds(), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             "stats": stats.as_dict(), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            "limits": {"max_archive_size": self._archive_csize() / MAX_DATA_SIZE}, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if self.create: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             info["command_line"] = sys.argv 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -556,12 +585,10 @@ Archive fingerprint: {0.fpr} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 Time (start): {start} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 Time (end):   {end} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 Duration: {0.duration} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-Utilization of max. archive size: {csize_max:.0%} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 """.format( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             self, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             start=OutputTimestamp(self.start.replace(tzinfo=timezone.utc)), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             end=OutputTimestamp(self.end.replace(tzinfo=timezone.utc)), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            csize_max=self._archive_csize() / MAX_DATA_SIZE, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             location=self.repository._location.canonical_path(), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -599,6 +626,7 @@ Utilization of max. archive size: {csize_max:.0%} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if name in self.manifest.archives: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             raise self.AlreadyExists(name) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         self.items_buffer.flush(flush=True) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        item_ptrs = archive_put_items(self.items_buffer.chunks, key=self.key, cache=self.cache, stats=self.stats) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         duration = timedelta(seconds=time.monotonic() - self.start_monotonic) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if timestamp is None: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             end = datetime.utcnow() 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -612,7 +640,7 @@ Utilization of max. archive size: {csize_max:.0%} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             "version": 2, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             "name": name, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             "comment": comment or "", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            "items": self.items_buffer.chunks, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            "item_ptrs": item_ptrs,  # see #1473 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             "cmdline": sys.argv, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             "hostname": hostname, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             "username": getuser(), 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -930,6 +958,8 @@ Utilization of max. archive size: {csize_max:.0%} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     def set_meta(self, key, value): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         metadata = self._load_meta(self.id) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         setattr(metadata, key, value) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if "items" in metadata: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            del metadata.items 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         data = msgpack.packb(metadata.as_dict()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         new_id = self.key.id_hash(data) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         self.cache.add_chunk(new_id, data, self.stats) 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -1004,6 +1034,11 @@ Utilization of max. archive size: {csize_max:.0%} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             if forced == 0: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 raise 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             error = True 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # delete the blocks that store all the references that end up being loaded into metadata.items: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for id in self.metadata.item_ptrs: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            chunk_decref(id, stats) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # in forced delete mode, we try hard to delete at least the manifest entry, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # if possible also the archive superblock, even if processing the items raises 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # some harmless exception. 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -1997,7 +2032,8 @@ class ArchiveChecker: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 return True, "" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             i = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            for state, items in groupby(archive.items, missing_chunk_detector): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            archive_items = archive_get_items(archive, self.key, repository) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            for state, items in groupby(archive_items, missing_chunk_detector): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 items = list(items) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 if state % 2: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     for chunk_id in items: 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -2078,9 +2114,11 @@ class ArchiveChecker: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         verify_file_chunks(info.name, item) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     items_buffer.add(item) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 items_buffer.flush(flush=True) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                for previous_item_id in archive.items: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                for previous_item_id in archive_get_items(archive, self.key, self.repository): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     mark_as_possibly_superseded(previous_item_id) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                archive.items = items_buffer.chunks 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                for previous_item_ptr in archive.item_ptrs: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    mark_as_possibly_superseded(previous_item_ptr) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                archive.item_ptrs = archive_put_items(items_buffer.chunks, key=self.key, add_reference=add_reference) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 data = msgpack.packb(archive.as_dict()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 new_archive_id = self.key.id_hash(data) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 cdata = self.key.encrypt(new_archive_id, data) 
			 |