|
@@ -1217,61 +1217,6 @@ class Repository:
|
|
|
self.index = self.open_index(self.get_transaction_id())
|
|
|
return [id_ for id_, _ in islice(self.index.iteritems(marker=marker, mask=mask, value=value), limit)]
|
|
|
|
|
|
- def scan(self, limit=None, state=None):
|
|
|
- """
|
|
|
- list (the next) <limit> chunk IDs from the repository - in on-disk order, so that a client
|
|
|
- fetching data in this order does linear reads and reuses stuff from disk cache.
|
|
|
-
|
|
|
- state can either be None (initially, when starting to scan) or the object
|
|
|
- returned from a previous scan call (meaning "continue scanning").
|
|
|
-
|
|
|
- returns: list of chunk ids, state
|
|
|
-
|
|
|
- We rely on repository.check() has run already (either now or some time before) and that:
|
|
|
-
|
|
|
- - if we are called from a borg check command, self.index is a valid, fresh, in-sync repo index.
|
|
|
- - if we are called from elsewhere, either self.index or the on-disk index is valid and in-sync.
|
|
|
- - the repository segments are valid (no CRC errors).
|
|
|
- if we encounter CRC errors in segment entry headers, rest of segment is skipped.
|
|
|
- """
|
|
|
- if limit is not None and limit < 1:
|
|
|
- raise ValueError("please use limit > 0 or limit = None")
|
|
|
- transaction_id = self.get_transaction_id()
|
|
|
- if not self.index:
|
|
|
- self.index = self.open_index(transaction_id)
|
|
|
- # smallest valid seg is <uint32> 0, smallest valid offs is <uint32> 8
|
|
|
- start_segment, start_offset, end_segment = state if state is not None else (0, 0, transaction_id)
|
|
|
- ids, segment, offset = [], 0, 0
|
|
|
- # we only scan up to end_segment == transaction_id to scan only **committed** chunks,
|
|
|
- # avoiding scanning into newly written chunks.
|
|
|
- for segment, filename in self.io.segment_iterator(start_segment, end_segment):
|
|
|
- # the start_offset we potentially got from state is only valid for the start_segment we also got
|
|
|
- # from there. in case the segment file vanished meanwhile, the segment_iterator might never
|
|
|
- # return a segment/filename corresponding to the start_segment and we must start from offset 0 then.
|
|
|
- start_offset = start_offset if segment == start_segment else 0
|
|
|
- obj_iterator = self.io.iter_objects(segment, start_offset, read_data=False)
|
|
|
- while True:
|
|
|
- try:
|
|
|
- tag, id, offset, size, _ = next(obj_iterator)
|
|
|
- except (StopIteration, IntegrityError):
|
|
|
- # either end-of-segment or an error - we can not seek to objects at
|
|
|
- # higher offsets than one that has an error in the header fields.
|
|
|
- break
|
|
|
- if start_offset > 0:
|
|
|
- # we are using a state != None and it points to the last object we have already
|
|
|
- # returned in the previous scan() call - thus, we need to skip this one object.
|
|
|
- # also, for the next segment, we need to start at offset 0.
|
|
|
- start_offset = 0
|
|
|
- continue
|
|
|
- if tag in (TAG_PUT2, TAG_PUT):
|
|
|
- in_index = self.index.get(id)
|
|
|
- if in_index and (in_index.segment, in_index.offset) == (segment, offset):
|
|
|
- # we have found an existing and current object
|
|
|
- ids.append(id)
|
|
|
- if len(ids) == limit:
|
|
|
- return ids, (segment, offset, end_segment)
|
|
|
- return ids, (segment, offset, end_segment)
|
|
|
-
|
|
|
def flags(self, id, mask=0xFFFFFFFF, value=None):
|
|
|
"""
|
|
|
query and optionally set flags
|
|
@@ -1625,7 +1570,7 @@ class LoggedIO:
|
|
|
fd.seek(0)
|
|
|
return fd.read(MAGIC_LEN)
|
|
|
|
|
|
- def iter_objects(self, segment, offset=0, read_data=True):
|
|
|
+ def iter_objects(self, segment, read_data=True):
|
|
|
"""
|
|
|
Return object iterator for *segment*.
|
|
|
|
|
@@ -1634,14 +1579,11 @@ class LoggedIO:
|
|
|
The iterator returns five-tuples of (tag, key, offset, size, data).
|
|
|
"""
|
|
|
fd = self.get_fd(segment)
|
|
|
+ offset = 0
|
|
|
fd.seek(offset)
|
|
|
- if offset == 0:
|
|
|
- # we are touching this segment for the first time, check the MAGIC.
|
|
|
- # Repository.scan() calls us with segment > 0 when it continues an ongoing iteration
|
|
|
- # from a marker position - but then we have checked the magic before already.
|
|
|
- if fd.read(MAGIC_LEN) != MAGIC:
|
|
|
- raise IntegrityError(f"Invalid segment magic [segment {segment}, offset {0}]")
|
|
|
- offset = MAGIC_LEN
|
|
|
+ if fd.read(MAGIC_LEN) != MAGIC:
|
|
|
+ raise IntegrityError(f"Invalid segment magic [segment {segment}, offset {offset}]")
|
|
|
+ offset = MAGIC_LEN
|
|
|
header = fd.read(self.header_fmt.size)
|
|
|
while header:
|
|
|
size, tag, key, data = self._read(
|