Browse Source

Merge pull request #7040 from ThomasWaldmann/fix2-scan

repository.scan: use same end_segment within same scan
TW 2 years ago
parent
commit
63f736be4f

+ 2 - 2
src/borg/archive.py

@@ -1747,9 +1747,9 @@ class ArchiveChecker:
         pi = ProgressIndicatorPercent(
             total=chunks_count_index, msg="Verifying data %6.2f%%", step=0.01, msgid="check.verify_data"
         )
-        marker = None
+        state = None
         while True:
-            chunk_ids, marker = self.repository.scan(limit=100, marker=marker)
+            chunk_ids, state = self.repository.scan(limit=100, state=state)
             if not chunk_ids:
                 break
             chunks_count_segments += len(chunk_ids)

+ 4 - 6
src/borg/archiver/debug_cmd.py

@@ -152,12 +152,10 @@ class DebugMixIn:
             cdata = repository.get(ids[0])
             key = key_factory(repository, cdata)
             repo_objs = RepoObj(key)
-            marker = None
+            state = None
             i = 0
             while True:
-                ids, marker = repository.scan(
-                    limit=LIST_SCAN_LIMIT, marker=marker
-                )  # must use on-disk order scanning here
+                ids, state = repository.scan(limit=LIST_SCAN_LIMIT, state=state)  # must use on-disk order scanning here
                 if not ids:
                     break
                 for id in ids:
@@ -203,12 +201,12 @@ class DebugMixIn:
         key = key_factory(repository, cdata)
         repo_objs = RepoObj(key)
 
-        marker = None
+        state = None
         last_data = b""
         last_id = None
         i = 0
         while True:
-            ids, marker = repository.scan(limit=LIST_SCAN_LIMIT, marker=marker)  # must use on-disk order scanning here
+            ids, state = repository.scan(limit=LIST_SCAN_LIMIT, state=state)  # must use on-disk order scanning here
             if not ids:
                 break
             for id in ids:

+ 2 - 2
src/borg/remote.py

@@ -989,8 +989,8 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
     def list(self, limit=None, marker=None, mask=0, value=0):
         """actual remoting is done via self.call in the @api decorator"""
 
-    @api(since=parse_version("1.1.0b3"))
-    def scan(self, limit=None, marker=None):
+    @api(since=parse_version("2.0.0b2"))
+    def scan(self, limit=None, state=None):
         """actual remoting is done via self.call in the @api decorator"""
 
     @api(since=parse_version("2.0.0b2"))

+ 9 - 9
src/borg/repository.py

@@ -1207,15 +1207,15 @@ class Repository:
             self.index = self.open_index(self.get_transaction_id())
         return [id_ for id_, _ in islice(self.index.iteritems(marker=marker, mask=mask, value=value), limit)]
 
-    def scan(self, limit=None, marker=None):
+    def scan(self, limit=None, state=None):
         """
-        list <limit> IDs starting from after <marker> - in on-disk order, so that a client
+        list (the next) <limit> chunk IDs from the repository - in on-disk order, so that a client
         fetching data in this order does linear reads and reuses stuff from disk cache.
 
-        marker can either be None (default, meaning "start from the beginning") or the object
-        returned from a previous scan call (meaning "continue scanning where we stopped previously").
+        state can either be None (initially, when starting to scan) or the object
+        returned from a previous scan call (meaning "continue scanning").
 
-        returns: list of chunk ids, marker
+        returns: list of chunk ids, state
 
         We rely on repository.check() has run already (either now or some time before) and that:
 
@@ -1230,11 +1230,11 @@ class Repository:
         if not self.index:
             self.index = self.open_index(transaction_id)
         # smallest valid seg is <uint32> 0, smallest valid offs is <uint32> 8
-        start_segment, start_offset = marker if marker is not None else (0, 0)
+        start_segment, start_offset, end_segment = state if state is not None else (0, 0, transaction_id)
         ids, segment, offset = [], 0, 0
         # we only scan up to end_segment == transaction_id to only scan **committed** chunks,
         # avoiding scanning into newly written chunks.
-        for segment, filename in self.io.segment_iterator(start_segment, transaction_id):
+        for segment, filename in self.io.segment_iterator(start_segment, end_segment):
             obj_iterator = self.io.iter_objects(segment, start_offset, read_data=False)
             while True:
                 try:
@@ -1255,8 +1255,8 @@ class Repository:
                         # we have found an existing and current object
                         ids.append(id)
                         if len(ids) == limit:
-                            return ids, (segment, offset)
-        return ids, (segment, offset)
+                            return ids, (segment, offset, end_segment)
+        return ids, (segment, offset, end_segment)
 
     def flags(self, id, mask=0xFFFFFFFF, value=None):
         """

+ 2 - 2
src/borg/testsuite/repository.py

@@ -191,10 +191,10 @@ class RepositoryTestCase(RepositoryTestCaseBase):
         self.repository.commit(compact=False)
         all, _ = self.repository.scan()
         assert len(all) == 100
-        first_half, marker = self.repository.scan(limit=50)
+        first_half, state = self.repository.scan(limit=50)
         assert len(first_half) == 50
         assert first_half == all[:50]
-        second_half, _ = self.repository.scan(marker=marker)
+        second_half, _ = self.repository.scan(state=state)
         assert len(second_half) == 50
         assert second_half == all[50:]
         # check result order == on-disk order (which is hash order)