2
0

archive.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370
  1. import json
  2. from collections import OrderedDict
  3. from datetime import datetime, timezone
  4. from io import StringIO
  5. from unittest.mock import Mock
  6. import pytest
  7. from . import BaseTestCase
  8. from ..crypto.key import PlaintextKey
  9. from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, valid_msgpacked_dict, ITEM_KEYS, Statistics
  10. from ..archive import BackupOSError, backup_io, backup_io_iter, get_item_uid_gid
  11. from ..helpers import Manifest
  12. from ..helpers import msgpack
  13. from ..item import Item, ArchiveItem
  14. from ..platform import uid2user, gid2group
  15. @pytest.fixture()
  16. def stats():
  17. stats = Statistics()
  18. stats.update(20, 10, unique=True)
  19. return stats
  20. def test_stats_basic(stats):
  21. assert stats.osize == 20
  22. assert stats.csize == stats.usize == 10
  23. stats.update(20, 10, unique=False)
  24. assert stats.osize == 40
  25. assert stats.csize == 20
  26. assert stats.usize == 10
  27. def tests_stats_progress_tty(stats, monkeypatch, columns=80):
  28. class TTYStringIO(StringIO):
  29. def isatty(self):
  30. return True
  31. monkeypatch.setenv('COLUMNS', str(columns))
  32. out = TTYStringIO()
  33. stats.show_progress(stream=out)
  34. s = '20 B O 10 B C 10 B D 0 N '
  35. buf = ' ' * (columns - len(s))
  36. assert out.getvalue() == s + buf + "\r"
  37. out = TTYStringIO()
  38. stats.update(10**3, 0, unique=False)
  39. stats.show_progress(item=Item(path='foo'), final=False, stream=out)
  40. s = '1.02 kB O 10 B C 10 B D 0 N foo'
  41. buf = ' ' * (columns - len(s))
  42. assert out.getvalue() == s + buf + "\r"
  43. out = TTYStringIO()
  44. stats.show_progress(item=Item(path='foo'*40), final=False, stream=out)
  45. s = '1.02 kB O 10 B C 10 B D 0 N foofoofoofoofoofoofoofo...oofoofoofoofoofoofoofoofoo'
  46. buf = ' ' * (columns - len(s))
  47. assert out.getvalue() == s + buf + "\r"
  48. def tests_stats_progress_file(stats, monkeypatch):
  49. out = StringIO()
  50. stats.show_progress(stream=out)
  51. s = '20 B O 10 B C 10 B D 0 N '
  52. assert out.getvalue() == s + "\n"
  53. out = StringIO()
  54. stats.update(10**3, 0, unique=False)
  55. path = 'foo'
  56. stats.show_progress(item=Item(path=path), final=False, stream=out)
  57. s = f'1.02 kB O 10 B C 10 B D 0 N {path}'
  58. assert out.getvalue() == s + "\n"
  59. out = StringIO()
  60. path = 'foo' * 40
  61. stats.show_progress(item=Item(path=path), final=False, stream=out)
  62. s = f'1.02 kB O 10 B C 10 B D 0 N {path}'
  63. assert out.getvalue() == s + "\n"
  64. def test_stats_format(stats):
  65. assert str(stats) == """\
  66. This archive: 20 B 10 B 10 B"""
  67. s = f"{stats.osize_fmt}"
  68. assert s == "20 B"
  69. # kind of redundant, but id is variable so we can't match reliably
  70. assert repr(stats) == f'<Statistics object at {id(stats):#x} (20, 10, 10)>'
  71. def test_stats_progress_json(stats):
  72. stats.output_json = True
  73. out = StringIO()
  74. stats.show_progress(item=Item(path='foo'), stream=out)
  75. result = json.loads(out.getvalue())
  76. assert result['type'] == 'archive_progress'
  77. assert isinstance(result['time'], float)
  78. assert result['finished'] is False
  79. assert result['path'] == 'foo'
  80. assert result['original_size'] == 20
  81. assert result['compressed_size'] == 10
  82. assert result['deduplicated_size'] == 10
  83. assert result['nfiles'] == 0 # this counter gets updated elsewhere
  84. out = StringIO()
  85. stats.show_progress(stream=out, final=True)
  86. result = json.loads(out.getvalue())
  87. assert result['type'] == 'archive_progress'
  88. assert isinstance(result['time'], float)
  89. assert result['finished'] is True # see #6570
  90. assert 'path' not in result
  91. assert 'original_size' not in result
  92. assert 'compressed_size' not in result
  93. assert 'deduplicated_size' not in result
  94. assert 'nfiles' not in result
  95. class MockCache:
  96. class MockRepo:
  97. def async_response(self, wait=True):
  98. pass
  99. def __init__(self):
  100. self.objects = {}
  101. self.repository = self.MockRepo()
  102. def add_chunk(self, id, chunk, stats=None, wait=True):
  103. self.objects[id] = chunk
  104. return id, len(chunk), len(chunk)
  105. class ArchiveTimestampTestCase(BaseTestCase):
  106. def _test_timestamp_parsing(self, isoformat, expected):
  107. repository = Mock()
  108. key = PlaintextKey(repository)
  109. manifest = Manifest(repository, key)
  110. a = Archive(repository, key, manifest, 'test', create=True)
  111. a.metadata = ArchiveItem(time=isoformat)
  112. self.assert_equal(a.ts, expected)
  113. def test_with_microseconds(self):
  114. self._test_timestamp_parsing(
  115. '1970-01-01T00:00:01.000001',
  116. datetime(1970, 1, 1, 0, 0, 1, 1, timezone.utc))
  117. def test_without_microseconds(self):
  118. self._test_timestamp_parsing(
  119. '1970-01-01T00:00:01',
  120. datetime(1970, 1, 1, 0, 0, 1, 0, timezone.utc))
  121. class ChunkBufferTestCase(BaseTestCase):
  122. def test(self):
  123. data = [Item(path='p1'), Item(path='p2')]
  124. cache = MockCache()
  125. key = PlaintextKey(None)
  126. chunks = CacheChunkBuffer(cache, key, None)
  127. for d in data:
  128. chunks.add(d)
  129. chunks.flush()
  130. chunks.flush(flush=True)
  131. self.assert_equal(len(chunks.chunks), 2)
  132. unpacker = msgpack.Unpacker()
  133. for id in chunks.chunks:
  134. unpacker.feed(cache.objects[id])
  135. self.assert_equal(data, [Item(internal_dict=d) for d in unpacker])
  136. def test_partial(self):
  137. big = "0123456789abcdefghijklmnopqrstuvwxyz" * 25000
  138. data = [Item(path='full', source=big), Item(path='partial', source=big)]
  139. cache = MockCache()
  140. key = PlaintextKey(None)
  141. chunks = CacheChunkBuffer(cache, key, None)
  142. for d in data:
  143. chunks.add(d)
  144. chunks.flush(flush=False)
  145. # the code is expected to leave the last partial chunk in the buffer
  146. self.assert_equal(len(chunks.chunks), 3)
  147. assert chunks.buffer.tell() > 0
  148. # now really flush
  149. chunks.flush(flush=True)
  150. self.assert_equal(len(chunks.chunks), 4)
  151. assert chunks.buffer.tell() == 0
  152. unpacker = msgpack.Unpacker()
  153. for id in chunks.chunks:
  154. unpacker.feed(cache.objects[id])
  155. self.assert_equal(data, [Item(internal_dict=d) for d in unpacker])
  156. class RobustUnpackerTestCase(BaseTestCase):
  157. def make_chunks(self, items):
  158. return b''.join(msgpack.packb({'path': item}) for item in items)
  159. def _validator(self, value):
  160. return isinstance(value, dict) and value.get(b'path') in (b'foo', b'bar', b'boo', b'baz')
  161. def process(self, input):
  162. unpacker = RobustUnpacker(validator=self._validator, item_keys=ITEM_KEYS)
  163. result = []
  164. for should_sync, chunks in input:
  165. if should_sync:
  166. unpacker.resync()
  167. for data in chunks:
  168. unpacker.feed(data)
  169. for item in unpacker:
  170. result.append(item)
  171. return result
  172. def test_extra_garbage_no_sync(self):
  173. chunks = [(False, [self.make_chunks([b'foo', b'bar'])]),
  174. (False, [b'garbage'] + [self.make_chunks([b'boo', b'baz'])])]
  175. result = self.process(chunks)
  176. self.assert_equal(result, [
  177. {b'path': b'foo'}, {b'path': b'bar'},
  178. 103, 97, 114, 98, 97, 103, 101,
  179. {b'path': b'boo'},
  180. {b'path': b'baz'}])
  181. def split(self, left, length):
  182. parts = []
  183. while left:
  184. parts.append(left[:length])
  185. left = left[length:]
  186. return parts
  187. def test_correct_stream(self):
  188. chunks = self.split(self.make_chunks([b'foo', b'bar', b'boo', b'baz']), 2)
  189. input = [(False, chunks)]
  190. result = self.process(input)
  191. self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'bar'}, {b'path': b'boo'}, {b'path': b'baz'}])
  192. def test_missing_chunk(self):
  193. chunks = self.split(self.make_chunks([b'foo', b'bar', b'boo', b'baz']), 4)
  194. input = [(False, chunks[:3]), (True, chunks[4:])]
  195. result = self.process(input)
  196. self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}])
  197. def test_corrupt_chunk(self):
  198. chunks = self.split(self.make_chunks([b'foo', b'bar', b'boo', b'baz']), 4)
  199. input = [(False, chunks[:3]), (True, [b'gar', b'bage'] + chunks[3:])]
  200. result = self.process(input)
  201. self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}])
  202. @pytest.fixture
  203. def item_keys_serialized():
  204. return [msgpack.packb(name) for name in ITEM_KEYS]
  205. @pytest.mark.parametrize('packed',
  206. [b'', b'x', b'foobar', ] +
  207. [msgpack.packb(o) for o in (
  208. [None, 0, 0.0, False, '', {}, [], ()] +
  209. [42, 23.42, True, b'foobar', {b'foo': b'bar'}, [b'foo', b'bar'], (b'foo', b'bar')]
  210. )])
  211. def test_invalid_msgpacked_item(packed, item_keys_serialized):
  212. assert not valid_msgpacked_dict(packed, item_keys_serialized)
  213. # pytest-xdist requires always same order for the keys and dicts:
  214. IK = sorted(list(ITEM_KEYS))
  215. @pytest.mark.parametrize('packed',
  216. [msgpack.packb(o) for o in [
  217. {b'path': b'/a/b/c'}, # small (different msgpack mapping type!)
  218. OrderedDict((k, b'') for k in IK), # as big (key count) as it gets
  219. OrderedDict((k, b'x' * 1000) for k in IK), # as big (key count and volume) as it gets
  220. ]],
  221. ids=["minimal", "empty-values", "long-values"])
  222. def test_valid_msgpacked_items(packed, item_keys_serialized):
  223. assert valid_msgpacked_dict(packed, item_keys_serialized)
  224. def test_key_length_msgpacked_items():
  225. key = b'x' * 32 # 31 bytes is the limit for fixstr msgpack type
  226. data = {key: b''}
  227. item_keys_serialized = [msgpack.packb(key), ]
  228. assert valid_msgpacked_dict(msgpack.packb(data), item_keys_serialized)
  229. def test_backup_io():
  230. with pytest.raises(BackupOSError):
  231. with backup_io:
  232. raise OSError(123)
  233. def test_backup_io_iter():
  234. class Iterator:
  235. def __init__(self, exc):
  236. self.exc = exc
  237. def __next__(self):
  238. raise self.exc()
  239. oserror_iterator = Iterator(OSError)
  240. with pytest.raises(BackupOSError):
  241. for _ in backup_io_iter(oserror_iterator):
  242. pass
  243. normal_iterator = Iterator(StopIteration)
  244. for _ in backup_io_iter(normal_iterator):
  245. assert False, 'StopIteration handled incorrectly'
  246. def test_get_item_uid_gid():
  247. # test requires that:
  248. # - a name for user 0 and group 0 exists, usually root:root or root:wheel.
  249. # - a system user/group udoesnotexist:gdoesnotexist does NOT exist.
  250. user0, group0 = uid2user(0), gid2group(0)
  251. # this is intentionally a "strange" item, with not matching ids/names.
  252. item = Item(path='filename', uid=1, gid=2, user=user0, group=group0)
  253. uid, gid = get_item_uid_gid(item, numeric=False)
  254. # these are found via a name-to-id lookup
  255. assert uid == 0
  256. assert gid == 0
  257. uid, gid = get_item_uid_gid(item, numeric=True)
  258. # these are directly taken from the item.uid and .gid
  259. assert uid == 1
  260. assert gid == 2
  261. uid, gid = get_item_uid_gid(item, numeric=False, uid_forced=3, gid_forced=4)
  262. # these are enforced (not from item metadata)
  263. assert uid == 3
  264. assert gid == 4
  265. # item metadata broken, has negative ids.
  266. item = Item(path='filename', uid=-1, gid=-2, user=user0, group=group0)
  267. uid, gid = get_item_uid_gid(item, numeric=True)
  268. # use the uid/gid defaults (which both default to 0).
  269. assert uid == 0
  270. assert gid == 0
  271. uid, gid = get_item_uid_gid(item, numeric=True, uid_default=5, gid_default=6)
  272. # use the uid/gid defaults (as given).
  273. assert uid == 5
  274. assert gid == 6
  275. # item metadata broken, has negative ids and non-existing user/group names.
  276. item = Item(path='filename', uid=-3, gid=-4, user='udoesnotexist', group='gdoesnotexist')
  277. uid, gid = get_item_uid_gid(item, numeric=False)
  278. # use the uid/gid defaults (which both default to 0).
  279. assert uid == 0
  280. assert gid == 0
  281. uid, gid = get_item_uid_gid(item, numeric=True, uid_default=7, gid_default=8)
  282. # use the uid/gid defaults (as given).
  283. assert uid == 7
  284. assert gid == 8
  285. # item metadata has valid uid/gid, but non-existing user/group names.
  286. item = Item(path='filename', uid=9, gid=10, user='udoesnotexist', group='gdoesnotexist')
  287. uid, gid = get_item_uid_gid(item, numeric=False)
  288. # because user/group name does not exist here, use valid numeric ids from item metadata.
  289. assert uid == 9
  290. assert gid == 10
  291. uid, gid = get_item_uid_gid(item, numeric=False, uid_default=11, gid_default=12)
  292. # because item uid/gid seems valid, do not use the given uid/gid defaults
  293. assert uid == 9
  294. assert gid == 10