archiver.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499
  1. import os
  2. from io import StringIO
  3. import stat
  4. import subprocess
  5. import sys
  6. import shutil
  7. import tempfile
  8. import time
  9. import unittest
  10. from hashlib import sha256
  11. from attic import xattr
  12. from attic.archive import Archive, ChunkBuffer
  13. from attic.archiver import Archiver
  14. from attic.crypto import bytes_to_long, num_aes_blocks
  15. from attic.helpers import Manifest
  16. from attic.key import parser
  17. from attic.remote import RemoteRepository, PathNotAllowed
  18. from attic.repository import Repository
  19. from attic.testsuite import AtticTestCase
  20. from attic.testsuite.mock import patch
  21. try:
  22. import llfuse
  23. has_llfuse = True
  24. except ImportError:
  25. has_llfuse = False
  26. has_lchflags = hasattr(os, 'lchflags')
  27. src_dir = os.path.join(os.getcwd(), os.path.dirname(__file__), '..')
  28. class changedir:
  29. def __init__(self, dir):
  30. self.dir = dir
  31. def __enter__(self):
  32. self.old = os.getcwd()
  33. os.chdir(self.dir)
  34. def __exit__(self, *args, **kw):
  35. os.chdir(self.old)
  36. class ArchiverTestCaseBase(AtticTestCase):
  37. prefix = ''
  38. def setUp(self):
  39. os.environ['ATTIC_CHECK_I_KNOW_WHAT_I_AM_DOING'] = '1'
  40. self.archiver = Archiver()
  41. self.tmpdir = tempfile.mkdtemp()
  42. self.repository_path = os.path.join(self.tmpdir, 'repository')
  43. self.repository_location = self.prefix + self.repository_path
  44. self.input_path = os.path.join(self.tmpdir, 'input')
  45. self.output_path = os.path.join(self.tmpdir, 'output')
  46. self.keys_path = os.path.join(self.tmpdir, 'keys')
  47. self.cache_path = os.path.join(self.tmpdir, 'cache')
  48. self.exclude_file_path = os.path.join(self.tmpdir, 'excludes')
  49. os.environ['ATTIC_KEYS_DIR'] = self.keys_path
  50. os.environ['ATTIC_CACHE_DIR'] = self.cache_path
  51. os.mkdir(self.input_path)
  52. os.mkdir(self.output_path)
  53. os.mkdir(self.keys_path)
  54. os.mkdir(self.cache_path)
  55. with open(self.exclude_file_path, 'wb') as fd:
  56. fd.write(b'input/file2\n# A commment line, then a blank line\n\n')
  57. self._old_wd = os.getcwd()
  58. os.chdir(self.tmpdir)
  59. def tearDown(self):
  60. shutil.rmtree(self.tmpdir)
  61. os.chdir(self._old_wd)
  62. def attic(self, *args, **kw):
  63. exit_code = kw.get('exit_code', 0)
  64. fork = kw.get('fork', False)
  65. if fork:
  66. try:
  67. output = subprocess.check_output((sys.executable, '-m', 'attic.archiver') + args)
  68. ret = 0
  69. except subprocess.CalledProcessError as e:
  70. output = e.output
  71. ret = e.returncode
  72. output = os.fsdecode(output)
  73. if ret != exit_code:
  74. print(output)
  75. self.assert_equal(exit_code, ret)
  76. return output
  77. args = list(args)
  78. stdout, stderr = sys.stdout, sys.stderr
  79. try:
  80. output = StringIO()
  81. sys.stdout = sys.stderr = output
  82. ret = self.archiver.run(args)
  83. sys.stdout, sys.stderr = stdout, stderr
  84. if ret != exit_code:
  85. print(output.getvalue())
  86. self.assert_equal(exit_code, ret)
  87. return output.getvalue()
  88. finally:
  89. sys.stdout, sys.stderr = stdout, stderr
  90. def create_src_archive(self, name):
  91. self.attic('create', self.repository_location + '::' + name, src_dir)
  92. class ArchiverTestCase(ArchiverTestCaseBase):
  93. def create_regular_file(self, name, size=0, contents=None):
  94. filename = os.path.join(self.input_path, name)
  95. if not os.path.exists(os.path.dirname(filename)):
  96. os.makedirs(os.path.dirname(filename))
  97. with open(filename, 'wb') as fd:
  98. if contents is None:
  99. contents = b'X' * size
  100. fd.write(contents)
  101. def create_test_files(self):
  102. """Create a minimal test case including all supported file types
  103. """
  104. # File
  105. self.create_regular_file('empty', size=0)
  106. # 2600-01-01 > 2**64 ns
  107. os.utime('input/empty', (19880895600, 19880895600))
  108. self.create_regular_file('file1', size=1024 * 80)
  109. self.create_regular_file('flagfile', size=1024)
  110. # Directory
  111. self.create_regular_file('dir2/file2', size=1024 * 80)
  112. # File owner
  113. os.chown('input/file1', 100, 200)
  114. # File mode
  115. os.chmod('input/file1', 0o7755)
  116. os.chmod('input/dir2', 0o555)
  117. # Block device
  118. os.mknod('input/bdev', 0o600 | stat.S_IFBLK, os.makedev(10, 20))
  119. # Char device
  120. os.mknod('input/cdev', 0o600 | stat.S_IFCHR, os.makedev(30, 40))
  121. # Hard link
  122. os.link(os.path.join(self.input_path, 'file1'),
  123. os.path.join(self.input_path, 'hardlink'))
  124. # Symlink
  125. os.symlink('somewhere', os.path.join(self.input_path, 'link1'))
  126. if xattr.is_enabled(self.input_path):
  127. xattr.setxattr(os.path.join(self.input_path, 'file1'), 'user.foo', b'bar')
  128. # XXX this always fails for me
  129. # ubuntu 14.04, on a TMP dir filesystem with user_xattr, using fakeroot
  130. # same for newer ubuntu and centos.
  131. # if this is supported just on specific platform, platform should be checked first,
  132. # so that the test setup for all tests using it does not fail here always for others.
  133. #xattr.setxattr(os.path.join(self.input_path, 'link1'), 'user.foo_symlink', b'bar_symlink', follow_symlinks=False)
  134. # FIFO node
  135. os.mkfifo(os.path.join(self.input_path, 'fifo1'))
  136. if has_lchflags:
  137. os.lchflags(os.path.join(self.input_path, 'flagfile'), stat.UF_NODUMP)
  138. def test_basic_functionality(self):
  139. self.create_test_files()
  140. self.attic('init', self.repository_location)
  141. self.attic('create', self.repository_location + '::test', 'input')
  142. self.attic('create', self.repository_location + '::test.2', 'input')
  143. with changedir('output'):
  144. self.attic('extract', self.repository_location + '::test')
  145. self.assert_equal(len(self.attic('list', self.repository_location).splitlines()), 2)
  146. self.assert_equal(len(self.attic('list', self.repository_location + '::test').splitlines()), 11)
  147. self.assert_dirs_equal('input', 'output/input')
  148. info_output = self.attic('info', self.repository_location + '::test')
  149. self.assert_in('Number of files: 4', info_output)
  150. shutil.rmtree(self.cache_path)
  151. info_output2 = self.attic('info', self.repository_location + '::test')
  152. # info_output2 starts with some "initializing cache" text but should
  153. # end the same way as info_output
  154. assert info_output2.endswith(info_output)
  155. def test_strip_components(self):
  156. self.attic('init', self.repository_location)
  157. self.create_regular_file('dir/file')
  158. self.attic('create', self.repository_location + '::test', 'input')
  159. with changedir('output'):
  160. self.attic('extract', self.repository_location + '::test', '--strip-components', '3')
  161. self.assert_true(not os.path.exists('file'))
  162. with self.assert_creates_file('file'):
  163. self.attic('extract', self.repository_location + '::test', '--strip-components', '2')
  164. with self.assert_creates_file('dir/file'):
  165. self.attic('extract', self.repository_location + '::test', '--strip-components', '1')
  166. with self.assert_creates_file('input/dir/file'):
  167. self.attic('extract', self.repository_location + '::test', '--strip-components', '0')
  168. def test_extract_include_exclude(self):
  169. self.attic('init', self.repository_location)
  170. self.create_regular_file('file1', size=1024 * 80)
  171. self.create_regular_file('file2', size=1024 * 80)
  172. self.create_regular_file('file3', size=1024 * 80)
  173. self.create_regular_file('file4', size=1024 * 80)
  174. self.attic('create', '--exclude=input/file4', self.repository_location + '::test', 'input')
  175. with changedir('output'):
  176. self.attic('extract', self.repository_location + '::test', 'input/file1', )
  177. self.assert_equal(sorted(os.listdir('output/input')), ['file1'])
  178. with changedir('output'):
  179. self.attic('extract', '--exclude=input/file2', self.repository_location + '::test')
  180. self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'file3'])
  181. with changedir('output'):
  182. self.attic('extract', '--exclude-from=' + self.exclude_file_path, self.repository_location + '::test')
  183. self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'file3'])
  184. def test_exclude_caches(self):
  185. self.attic('init', self.repository_location)
  186. self.create_regular_file('file1', size=1024 * 80)
  187. self.create_regular_file('cache1/CACHEDIR.TAG', contents=b'Signature: 8a477f597d28d172789f06886806bc55 extra stuff')
  188. self.create_regular_file('cache2/CACHEDIR.TAG', contents=b'invalid signature')
  189. self.attic('create', '--exclude-caches', self.repository_location + '::test', 'input')
  190. with changedir('output'):
  191. self.attic('extract', self.repository_location + '::test')
  192. self.assert_equal(sorted(os.listdir('output/input')), ['cache2', 'file1'])
  193. self.assert_equal(sorted(os.listdir('output/input/cache2')), ['CACHEDIR.TAG'])
  194. def test_path_normalization(self):
  195. self.attic('init', self.repository_location)
  196. self.create_regular_file('dir1/dir2/file', size=1024 * 80)
  197. with changedir('input/dir1/dir2'):
  198. self.attic('create', self.repository_location + '::test', '../../../input/dir1/../dir1/dir2/..')
  199. output = self.attic('list', self.repository_location + '::test')
  200. self.assert_not_in('..', output)
  201. self.assert_in(' input/dir1/dir2/file', output)
  202. def test_exclude_normalization(self):
  203. self.attic('init', self.repository_location)
  204. self.create_regular_file('file1', size=1024 * 80)
  205. self.create_regular_file('file2', size=1024 * 80)
  206. with changedir('input'):
  207. self.attic('create', '--exclude=file1', self.repository_location + '::test1', '.')
  208. with changedir('output'):
  209. self.attic('extract', self.repository_location + '::test1')
  210. self.assert_equal(sorted(os.listdir('output')), ['file2'])
  211. with changedir('input'):
  212. self.attic('create', '--exclude=./file1', self.repository_location + '::test2', '.')
  213. with changedir('output'):
  214. self.attic('extract', self.repository_location + '::test2')
  215. self.assert_equal(sorted(os.listdir('output')), ['file2'])
  216. self.attic('create', '--exclude=input/./file1', self.repository_location + '::test3', 'input')
  217. with changedir('output'):
  218. self.attic('extract', self.repository_location + '::test3')
  219. self.assert_equal(sorted(os.listdir('output/input')), ['file2'])
  220. def test_repeated_files(self):
  221. self.create_regular_file('file1', size=1024 * 80)
  222. self.attic('init', self.repository_location)
  223. self.attic('create', self.repository_location + '::test', 'input', 'input')
  224. def test_overwrite(self):
  225. self.create_regular_file('file1', size=1024 * 80)
  226. self.create_regular_file('dir2/file2', size=1024 * 80)
  227. self.attic('init', self.repository_location)
  228. self.attic('create', self.repository_location + '::test', 'input')
  229. # Overwriting regular files and directories should be supported
  230. os.mkdir('output/input')
  231. os.mkdir('output/input/file1')
  232. os.mkdir('output/input/dir2')
  233. with changedir('output'):
  234. self.attic('extract', self.repository_location + '::test')
  235. self.assert_dirs_equal('input', 'output/input')
  236. # But non-empty dirs should fail
  237. os.unlink('output/input/file1')
  238. os.mkdir('output/input/file1')
  239. os.mkdir('output/input/file1/dir')
  240. with changedir('output'):
  241. self.attic('extract', self.repository_location + '::test', exit_code=1)
  242. def test_delete(self):
  243. self.create_regular_file('file1', size=1024 * 80)
  244. self.create_regular_file('dir2/file2', size=1024 * 80)
  245. self.attic('init', self.repository_location)
  246. self.attic('create', self.repository_location + '::test', 'input')
  247. self.attic('create', self.repository_location + '::test.2', 'input')
  248. self.attic('extract', '--dry-run', self.repository_location + '::test')
  249. self.attic('extract', '--dry-run', self.repository_location + '::test.2')
  250. self.attic('delete', self.repository_location + '::test')
  251. self.attic('extract', '--dry-run', self.repository_location + '::test.2')
  252. self.attic('delete', self.repository_location + '::test.2')
  253. # Make sure all data except the manifest has been deleted
  254. repository = Repository(self.repository_path)
  255. self.assert_equal(len(repository), 1)
  256. def test_corrupted_repository(self):
  257. self.attic('init', self.repository_location)
  258. self.create_src_archive('test')
  259. self.attic('extract', '--dry-run', self.repository_location + '::test')
  260. self.attic('check', self.repository_location)
  261. name = sorted(os.listdir(os.path.join(self.tmpdir, 'repository', 'data', '0')), reverse=True)[0]
  262. with open(os.path.join(self.tmpdir, 'repository', 'data', '0', name), 'r+') as fd:
  263. fd.seek(100)
  264. fd.write('XXXX')
  265. self.attic('check', self.repository_location, exit_code=1)
  266. def test_readonly_repository(self):
  267. self.attic('init', self.repository_location)
  268. self.create_src_archive('test')
  269. os.system('chmod -R ugo-w ' + self.repository_path)
  270. try:
  271. self.attic('extract', '--dry-run', self.repository_location + '::test')
  272. finally:
  273. # Restore permissions so shutil.rmtree is able to delete it
  274. os.system('chmod -R u+w ' + self.repository_path)
  275. def test_cmdline_compatibility(self):
  276. self.create_regular_file('file1', size=1024 * 80)
  277. self.attic('init', self.repository_location)
  278. self.attic('create', self.repository_location + '::test', 'input')
  279. output = self.attic('verify', '-v', self.repository_location + '::test')
  280. self.assert_in('"attic verify" has been deprecated', output)
  281. output = self.attic('prune', self.repository_location, '--hourly=1')
  282. self.assert_in('"--hourly" has been deprecated. Use "--keep-hourly" instead', output)
  283. def test_prune_repository(self):
  284. self.attic('init', self.repository_location)
  285. self.attic('create', self.repository_location + '::test1', src_dir)
  286. self.attic('create', self.repository_location + '::test2', src_dir)
  287. output = self.attic('prune', '-v', '--dry-run', self.repository_location, '--keep-daily=2')
  288. self.assert_in('Keeping archive: test2', output)
  289. self.assert_in('Would prune: test1', output)
  290. output = self.attic('list', self.repository_location)
  291. self.assert_in('test1', output)
  292. self.assert_in('test2', output)
  293. self.attic('prune', self.repository_location, '--keep-daily=2')
  294. output = self.attic('list', self.repository_location)
  295. self.assert_not_in('test1', output)
  296. self.assert_in('test2', output)
  297. def test_usage(self):
  298. self.assert_raises(SystemExit, lambda: self.attic())
  299. self.assert_raises(SystemExit, lambda: self.attic('-h'))
  300. @unittest.skipUnless(has_llfuse, 'llfuse not installed')
  301. def test_fuse_mount_repository(self):
  302. mountpoint = os.path.join(self.tmpdir, 'mountpoint')
  303. os.mkdir(mountpoint)
  304. self.attic('init', self.repository_location)
  305. self.create_test_files()
  306. self.attic('create', self.repository_location + '::archive', 'input')
  307. self.attic('create', self.repository_location + '::archive2', 'input')
  308. try:
  309. self.attic('mount', self.repository_location, mountpoint, fork=True)
  310. self.wait_for_mount(mountpoint)
  311. self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'archive', 'input'))
  312. self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'archive2', 'input'))
  313. finally:
  314. if sys.platform.startswith('linux'):
  315. os.system('fusermount -u ' + mountpoint)
  316. else:
  317. os.system('umount ' + mountpoint)
  318. os.rmdir(mountpoint)
  319. # Give the daemon some time to exit
  320. time.sleep(.2)
  321. @unittest.skipUnless(has_llfuse, 'llfuse not installed')
  322. def test_fuse_mount_archive(self):
  323. mountpoint = os.path.join(self.tmpdir, 'mountpoint')
  324. os.mkdir(mountpoint)
  325. self.attic('init', self.repository_location)
  326. self.create_test_files()
  327. self.attic('create', self.repository_location + '::archive', 'input')
  328. try:
  329. self.attic('mount', self.repository_location + '::archive', mountpoint, fork=True)
  330. self.wait_for_mount(mountpoint)
  331. self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'input'))
  332. finally:
  333. if sys.platform.startswith('linux'):
  334. os.system('fusermount -u ' + mountpoint)
  335. else:
  336. os.system('umount ' + mountpoint)
  337. os.rmdir(mountpoint)
  338. # Give the daemon some time to exit
  339. time.sleep(.2)
  340. def verify_aes_counter_uniqueness(self, method):
  341. seen = set() # Chunks already seen
  342. used = set() # counter values already used
  343. def verify_uniqueness():
  344. repository = Repository(self.repository_path)
  345. for key, _ in repository.open_index(repository.get_transaction_id()).iteritems():
  346. data = repository.get(key)
  347. hash = sha256(data).digest()
  348. if hash not in seen:
  349. seen.add(hash)
  350. mac, meta, data = parser(data)
  351. num_blocks = num_aes_blocks(len(data))
  352. nonce = bytes_to_long(meta.stored_iv)
  353. for counter in range(nonce, nonce + num_blocks):
  354. self.assert_not_in(counter, used)
  355. used.add(counter)
  356. self.create_test_files()
  357. os.environ['ATTIC_PASSPHRASE'] = 'passphrase'
  358. self.attic('init', '--encryption=' + method, self.repository_location)
  359. verify_uniqueness()
  360. self.attic('create', self.repository_location + '::test', 'input')
  361. verify_uniqueness()
  362. self.attic('create', self.repository_location + '::test.2', 'input')
  363. verify_uniqueness()
  364. self.attic('delete', self.repository_location + '::test.2')
  365. verify_uniqueness()
  366. self.assert_equal(used, set(range(len(used))))
  367. def test_aes_counter_uniqueness_keyfile(self):
  368. self.verify_aes_counter_uniqueness('keyfile')
  369. def test_aes_counter_uniqueness_passphrase(self):
  370. self.verify_aes_counter_uniqueness('passphrase')
  371. class ArchiverCheckTestCase(ArchiverTestCaseBase):
  372. def setUp(self):
  373. super(ArchiverCheckTestCase, self).setUp()
  374. with patch.object(ChunkBuffer, 'BUFFER_SIZE', 10):
  375. self.attic('init', self.repository_location)
  376. self.create_src_archive('archive1')
  377. self.create_src_archive('archive2')
  378. def open_archive(self, name):
  379. repository = Repository(self.repository_path)
  380. manifest, key = Manifest.load(repository)
  381. archive = Archive(repository, key, manifest, name)
  382. return archive, repository
  383. def test_check_usage(self):
  384. output = self.attic('check', self.repository_location, exit_code=0)
  385. self.assert_in('Starting repository check', output)
  386. self.assert_in('Starting archive consistency check', output)
  387. output = self.attic('check', '--repository-only', self.repository_location, exit_code=0)
  388. self.assert_in('Starting repository check', output)
  389. self.assert_not_in('Starting archive consistency check', output)
  390. output = self.attic('check', '--archives-only', self.repository_location, exit_code=0)
  391. self.assert_not_in('Starting repository check', output)
  392. self.assert_in('Starting archive consistency check', output)
  393. def test_missing_file_chunk(self):
  394. archive, repository = self.open_archive('archive1')
  395. for item in archive.iter_items():
  396. if item[b'path'].endswith('testsuite/archiver.py'):
  397. repository.delete(item[b'chunks'][-1][0])
  398. break
  399. repository.commit()
  400. self.attic('check', self.repository_location, exit_code=1)
  401. self.attic('check', '--repair', self.repository_location, exit_code=0)
  402. self.attic('check', self.repository_location, exit_code=0)
  403. def test_missing_archive_item_chunk(self):
  404. archive, repository = self.open_archive('archive1')
  405. repository.delete(archive.metadata[b'items'][-5])
  406. repository.commit()
  407. self.attic('check', self.repository_location, exit_code=1)
  408. self.attic('check', '--repair', self.repository_location, exit_code=0)
  409. self.attic('check', self.repository_location, exit_code=0)
  410. def test_missing_archive_metadata(self):
  411. archive, repository = self.open_archive('archive1')
  412. repository.delete(archive.id)
  413. repository.commit()
  414. self.attic('check', self.repository_location, exit_code=1)
  415. self.attic('check', '--repair', self.repository_location, exit_code=0)
  416. self.attic('check', self.repository_location, exit_code=0)
  417. def test_missing_manifest(self):
  418. archive, repository = self.open_archive('archive1')
  419. repository.delete(Manifest.MANIFEST_ID)
  420. repository.commit()
  421. self.attic('check', self.repository_location, exit_code=1)
  422. output = self.attic('check', '--repair', self.repository_location, exit_code=0)
  423. self.assert_in('archive1', output)
  424. self.assert_in('archive2', output)
  425. self.attic('check', self.repository_location, exit_code=0)
  426. def test_extra_chunks(self):
  427. self.attic('check', self.repository_location, exit_code=0)
  428. repository = Repository(self.repository_location)
  429. repository.put(b'01234567890123456789012345678901', b'xxxx')
  430. repository.commit()
  431. repository.close()
  432. self.attic('check', self.repository_location, exit_code=1)
  433. self.attic('check', self.repository_location, exit_code=1)
  434. self.attic('check', '--repair', self.repository_location, exit_code=0)
  435. self.attic('check', self.repository_location, exit_code=0)
  436. self.attic('extract', '--dry-run', self.repository_location + '::archive1', exit_code=0)
  437. class RemoteArchiverTestCase(ArchiverTestCase):
  438. prefix = '__testsuite__:'
  439. def test_remote_repo_restrict_to_path(self):
  440. self.attic('init', self.repository_location)
  441. path_prefix = os.path.dirname(self.repository_path)
  442. with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-path', '/foo']):
  443. self.assert_raises(PathNotAllowed, lambda: self.attic('init', self.repository_location + '_1'))
  444. with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-path', path_prefix]):
  445. self.attic('init', self.repository_location + '_2')
  446. with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-path', '/foo', '--restrict-to-path', path_prefix]):
  447. self.attic('init', self.repository_location + '_3')