repository.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646
  1. from configparser import ConfigParser
  2. from binascii import hexlify
  3. from itertools import islice
  4. import errno
  5. import logging
  6. logger = logging.getLogger(__name__)
  7. import os
  8. import shutil
  9. import struct
  10. from zlib import crc32
  11. from .helpers import Error, ErrorWithTraceback, IntegrityError, read_msgpack, write_msgpack, unhexlify
  12. from .hashindex import NSIndex
  13. from .locking import UpgradableLock, LockError, LockErrorT
  14. from .lrucache import LRUCache
  15. MAX_OBJECT_SIZE = 20 * 1024 * 1024
  16. MAGIC = b'BORG_SEG'
  17. MAGIC_LEN = len(MAGIC)
  18. TAG_PUT = 0
  19. TAG_DELETE = 1
  20. TAG_COMMIT = 2
  21. class Repository:
  22. """Filesystem based transactional key value store
  23. On disk layout:
  24. dir/README
  25. dir/config
  26. dir/data/<X / SEGMENTS_PER_DIR>/<X>
  27. dir/index.X
  28. dir/hints.X
  29. """
  30. DEFAULT_MAX_SEGMENT_SIZE = 5 * 1024 * 1024
  31. DEFAULT_SEGMENTS_PER_DIR = 10000
  32. class DoesNotExist(Error):
  33. """Repository {} does not exist."""
  34. class AlreadyExists(Error):
  35. """Repository {} already exists."""
  36. class InvalidRepository(Error):
  37. """{} is not a valid repository. Check repo config."""
  38. class CheckNeeded(ErrorWithTraceback):
  39. """Inconsistency detected. Please run "borg check {}"."""
  40. class ObjectNotFound(ErrorWithTraceback):
  41. """Object with key {} not found in repository {}."""
  42. def __init__(self, path, create=False, exclusive=False, lock_wait=None, lock=True):
  43. self.path = os.path.abspath(path)
  44. self.io = None
  45. self.lock = None
  46. self.index = None
  47. self._active_txn = False
  48. if create:
  49. self.create(self.path)
  50. self.open(self.path, exclusive, lock_wait=lock_wait, lock=lock)
  51. def __del__(self):
  52. self.close()
  53. def __repr__(self):
  54. return '<%s %s>' % (self.__class__.__name__, self.path)
  55. def create(self, path):
  56. """Create a new empty repository at `path`
  57. """
  58. if os.path.exists(path) and (not os.path.isdir(path) or os.listdir(path)):
  59. raise self.AlreadyExists(path)
  60. if not os.path.exists(path):
  61. os.mkdir(path)
  62. with open(os.path.join(path, 'README'), 'w') as fd:
  63. fd.write('This is a Borg repository\n')
  64. os.mkdir(os.path.join(path, 'data'))
  65. config = ConfigParser(interpolation=None)
  66. config.add_section('repository')
  67. config.set('repository', 'version', '1')
  68. config.set('repository', 'segments_per_dir', str(self.DEFAULT_SEGMENTS_PER_DIR))
  69. config.set('repository', 'max_segment_size', str(self.DEFAULT_MAX_SEGMENT_SIZE))
  70. config.set('repository', 'id', hexlify(os.urandom(32)).decode('ascii'))
  71. self.save_config(path, config)
  72. def save_config(self, path, config):
  73. config_path = os.path.join(path, 'config')
  74. with open(config_path, 'w') as fd:
  75. config.write(fd)
  76. def save_key(self, keydata):
  77. assert self.config
  78. keydata = keydata.decode('utf-8') # remote repo: msgpack issue #99, getting bytes
  79. self.config.set('repository', 'key', keydata)
  80. self.save_config(self.path, self.config)
  81. def load_key(self):
  82. keydata = self.config.get('repository', 'key')
  83. return keydata.encode('utf-8') # remote repo: msgpack issue #99, returning bytes
  84. def destroy(self):
  85. """Destroy the repository at `self.path`
  86. """
  87. self.close()
  88. os.remove(os.path.join(self.path, 'config')) # kill config first
  89. shutil.rmtree(self.path)
  90. def get_index_transaction_id(self):
  91. indices = sorted((int(name[6:]) for name in os.listdir(self.path) if name.startswith('index.') and name[6:].isdigit()))
  92. if indices:
  93. return indices[-1]
  94. else:
  95. return None
  96. def get_transaction_id(self):
  97. index_transaction_id = self.get_index_transaction_id()
  98. segments_transaction_id = self.io.get_segments_transaction_id()
  99. if index_transaction_id is not None and segments_transaction_id is None:
  100. raise self.CheckNeeded(self.path)
  101. # Attempt to automatically rebuild index if we crashed between commit
  102. # tag write and index save
  103. if index_transaction_id != segments_transaction_id:
  104. if index_transaction_id is not None and index_transaction_id > segments_transaction_id:
  105. replay_from = None
  106. else:
  107. replay_from = index_transaction_id
  108. self.replay_segments(replay_from, segments_transaction_id)
  109. return self.get_index_transaction_id()
  110. def break_lock(self):
  111. UpgradableLock(os.path.join(self.path, 'lock')).break_lock()
  112. def open(self, path, exclusive, lock_wait=None, lock=True):
  113. self.path = path
  114. if not os.path.isdir(path):
  115. raise self.DoesNotExist(path)
  116. if lock:
  117. self.lock = UpgradableLock(os.path.join(path, 'lock'), exclusive, timeout=lock_wait).acquire()
  118. else:
  119. self.lock = None
  120. self.config = ConfigParser(interpolation=None)
  121. self.config.read(os.path.join(self.path, 'config'))
  122. if 'repository' not in self.config.sections() or self.config.getint('repository', 'version') != 1:
  123. raise self.InvalidRepository(path)
  124. self.max_segment_size = self.config.getint('repository', 'max_segment_size')
  125. self.segments_per_dir = self.config.getint('repository', 'segments_per_dir')
  126. self.id = unhexlify(self.config.get('repository', 'id').strip())
  127. self.io = LoggedIO(self.path, self.max_segment_size, self.segments_per_dir)
  128. def close(self):
  129. if self.lock:
  130. if self.io:
  131. self.io.close()
  132. self.io = None
  133. self.lock.release()
  134. self.lock = None
  135. def commit(self):
  136. """Commit transaction
  137. """
  138. self.io.write_commit()
  139. self.compact_segments()
  140. self.write_index()
  141. self.rollback()
  142. def open_index(self, transaction_id):
  143. if transaction_id is None:
  144. return NSIndex()
  145. return NSIndex.read((os.path.join(self.path, 'index.%d') % transaction_id).encode('utf-8'))
  146. def prepare_txn(self, transaction_id, do_cleanup=True):
  147. self._active_txn = True
  148. try:
  149. self.lock.upgrade()
  150. except (LockError, LockErrorT):
  151. # if upgrading the lock to exclusive fails, we do not have an
  152. # active transaction. this is important for "serve" mode, where
  153. # the repository instance lives on - even if exceptions happened.
  154. self._active_txn = False
  155. raise
  156. if not self.index or transaction_id is None:
  157. self.index = self.open_index(transaction_id)
  158. if transaction_id is None:
  159. self.segments = {} # XXX bad name: usage_count_of_segment_x = self.segments[x]
  160. self.compact = set() # XXX bad name: segments_needing_compaction = self.compact
  161. else:
  162. if do_cleanup:
  163. self.io.cleanup(transaction_id)
  164. hints = read_msgpack(os.path.join(self.path, 'hints.%d' % transaction_id))
  165. if hints[b'version'] != 1:
  166. raise ValueError('Unknown hints file version: %d' % hints['version'])
  167. self.segments = hints[b'segments']
  168. self.compact = set(hints[b'compact'])
  169. def write_index(self):
  170. hints = {b'version': 1,
  171. b'segments': self.segments,
  172. b'compact': list(self.compact)}
  173. transaction_id = self.io.get_segments_transaction_id()
  174. write_msgpack(os.path.join(self.path, 'hints.%d' % transaction_id), hints)
  175. self.index.write(os.path.join(self.path, 'index.tmp'))
  176. os.rename(os.path.join(self.path, 'index.tmp'),
  177. os.path.join(self.path, 'index.%d' % transaction_id))
  178. # Remove old indices
  179. current = '.%d' % transaction_id
  180. for name in os.listdir(self.path):
  181. if not name.startswith('index.') and not name.startswith('hints.'):
  182. continue
  183. if name.endswith(current):
  184. continue
  185. os.unlink(os.path.join(self.path, name))
  186. self.index = None
  187. def compact_segments(self):
  188. """Compact sparse segments by copying data into new segments
  189. """
  190. if not self.compact:
  191. return
  192. index_transaction_id = self.get_index_transaction_id()
  193. segments = self.segments
  194. for segment in sorted(self.compact):
  195. if self.io.segment_exists(segment):
  196. for tag, key, offset, data in self.io.iter_objects(segment, include_data=True):
  197. if tag == TAG_PUT and self.index.get(key, (-1, -1)) == (segment, offset):
  198. new_segment, offset = self.io.write_put(key, data)
  199. self.index[key] = new_segment, offset
  200. segments.setdefault(new_segment, 0)
  201. segments[new_segment] += 1
  202. segments[segment] -= 1
  203. elif tag == TAG_DELETE:
  204. if index_transaction_id is None or segment > index_transaction_id:
  205. self.io.write_delete(key)
  206. assert segments[segment] == 0
  207. self.io.write_commit()
  208. for segment in sorted(self.compact):
  209. assert self.segments.pop(segment) == 0
  210. self.io.delete_segment(segment)
  211. self.compact = set()
  212. def replay_segments(self, index_transaction_id, segments_transaction_id):
  213. self.prepare_txn(index_transaction_id, do_cleanup=False)
  214. try:
  215. for segment, filename in self.io.segment_iterator():
  216. if index_transaction_id is not None and segment <= index_transaction_id:
  217. continue
  218. if segment > segments_transaction_id:
  219. break
  220. objects = self.io.iter_objects(segment)
  221. self._update_index(segment, objects)
  222. self.write_index()
  223. finally:
  224. self.rollback()
  225. def _update_index(self, segment, objects, report=None):
  226. """some code shared between replay_segments and check"""
  227. self.segments[segment] = 0
  228. for tag, key, offset in objects:
  229. if tag == TAG_PUT:
  230. try:
  231. s, _ = self.index[key]
  232. self.compact.add(s)
  233. self.segments[s] -= 1
  234. except KeyError:
  235. pass
  236. self.index[key] = segment, offset
  237. self.segments[segment] += 1
  238. elif tag == TAG_DELETE:
  239. try:
  240. s, _ = self.index.pop(key)
  241. self.segments[s] -= 1
  242. self.compact.add(s)
  243. except KeyError:
  244. pass
  245. self.compact.add(segment)
  246. elif tag == TAG_COMMIT:
  247. continue
  248. else:
  249. msg = 'Unexpected tag {} in segment {}'.format(tag, segment)
  250. if report is None:
  251. raise self.CheckNeeded(msg)
  252. else:
  253. report(msg)
  254. if self.segments[segment] == 0:
  255. self.compact.add(segment)
  256. def check(self, repair=False):
  257. """Check repository consistency
  258. This method verifies all segment checksums and makes sure
  259. the index is consistent with the data stored in the segments.
  260. """
  261. error_found = False
  262. def report_error(msg):
  263. nonlocal error_found
  264. error_found = True
  265. logger.error(msg)
  266. assert not self._active_txn
  267. try:
  268. transaction_id = self.get_transaction_id()
  269. current_index = self.open_index(transaction_id)
  270. except Exception:
  271. transaction_id = self.io.get_segments_transaction_id()
  272. current_index = None
  273. if transaction_id is None:
  274. transaction_id = self.get_index_transaction_id()
  275. if transaction_id is None:
  276. transaction_id = self.io.get_latest_segment()
  277. if repair:
  278. self.io.cleanup(transaction_id)
  279. segments_transaction_id = self.io.get_segments_transaction_id()
  280. self.prepare_txn(None) # self.index, self.compact, self.segments all empty now!
  281. for segment, filename in self.io.segment_iterator():
  282. if segment > transaction_id:
  283. continue
  284. try:
  285. objects = list(self.io.iter_objects(segment))
  286. except IntegrityError as err:
  287. report_error(str(err))
  288. objects = []
  289. if repair:
  290. self.io.recover_segment(segment, filename)
  291. objects = list(self.io.iter_objects(segment))
  292. self._update_index(segment, objects, report_error)
  293. # self.index, self.segments, self.compact now reflect the state of the segment files up to <transaction_id>
  294. # We might need to add a commit tag if no committed segment is found
  295. if repair and segments_transaction_id is None:
  296. report_error('Adding commit tag to segment {}'.format(transaction_id))
  297. self.io.segment = transaction_id + 1
  298. self.io.write_commit()
  299. if current_index and not repair:
  300. # current_index = "as found on disk"
  301. # self.index = "as rebuilt in-memory from segments"
  302. if len(current_index) != len(self.index):
  303. report_error('Index object count mismatch. {} != {}'.format(len(current_index), len(self.index)))
  304. elif current_index:
  305. for key, value in self.index.iteritems():
  306. if current_index.get(key, (-1, -1)) != value:
  307. report_error('Index mismatch for key {}. {} != {}'.format(key, value, current_index.get(key, (-1, -1))))
  308. if repair:
  309. self.compact_segments()
  310. self.write_index()
  311. self.rollback()
  312. return not error_found or repair
  313. def rollback(self):
  314. """
  315. """
  316. self.index = None
  317. self._active_txn = False
  318. def __len__(self):
  319. if not self.index:
  320. self.index = self.open_index(self.get_transaction_id())
  321. return len(self.index)
  322. def __contains__(self, id):
  323. if not self.index:
  324. self.index = self.open_index(self.get_transaction_id())
  325. return id in self.index
  326. def list(self, limit=None, marker=None):
  327. if not self.index:
  328. self.index = self.open_index(self.get_transaction_id())
  329. return [id_ for id_, _ in islice(self.index.iteritems(marker=marker), limit)]
  330. def get(self, id_):
  331. if not self.index:
  332. self.index = self.open_index(self.get_transaction_id())
  333. try:
  334. segment, offset = self.index[id_]
  335. return self.io.read(segment, offset, id_)
  336. except KeyError:
  337. raise self.ObjectNotFound(id_, self.path)
  338. def get_many(self, ids, is_preloaded=False):
  339. for id_ in ids:
  340. yield self.get(id_)
  341. def put(self, id, data, wait=True):
  342. if not self._active_txn:
  343. self.prepare_txn(self.get_transaction_id())
  344. try:
  345. segment, _ = self.index[id]
  346. self.segments[segment] -= 1
  347. self.compact.add(segment)
  348. segment = self.io.write_delete(id)
  349. self.segments.setdefault(segment, 0)
  350. self.compact.add(segment)
  351. except KeyError:
  352. pass
  353. segment, offset = self.io.write_put(id, data)
  354. self.segments.setdefault(segment, 0)
  355. self.segments[segment] += 1
  356. self.index[id] = segment, offset
  357. def delete(self, id, wait=True):
  358. if not self._active_txn:
  359. self.prepare_txn(self.get_transaction_id())
  360. try:
  361. segment, offset = self.index.pop(id)
  362. except KeyError:
  363. raise self.ObjectNotFound(id, self.path)
  364. self.segments[segment] -= 1
  365. self.compact.add(segment)
  366. segment = self.io.write_delete(id)
  367. self.compact.add(segment)
  368. self.segments.setdefault(segment, 0)
  369. def preload(self, ids):
  370. """Preload objects (only applies to remote repositories)
  371. """
  372. class LoggedIO:
  373. header_fmt = struct.Struct('<IIB')
  374. assert header_fmt.size == 9
  375. put_header_fmt = struct.Struct('<IIB32s')
  376. assert put_header_fmt.size == 41
  377. header_no_crc_fmt = struct.Struct('<IB')
  378. assert header_no_crc_fmt.size == 5
  379. crc_fmt = struct.Struct('<I')
  380. assert crc_fmt.size == 4
  381. _commit = header_no_crc_fmt.pack(9, TAG_COMMIT)
  382. COMMIT = crc_fmt.pack(crc32(_commit)) + _commit
  383. def __init__(self, path, limit, segments_per_dir, capacity=90):
  384. self.path = path
  385. self.fds = LRUCache(capacity,
  386. dispose=lambda fd: fd.close())
  387. self.segment = 0
  388. self.limit = limit
  389. self.segments_per_dir = segments_per_dir
  390. self.offset = 0
  391. self._write_fd = None
  392. def close(self):
  393. self.close_segment()
  394. self.fds.clear()
  395. self.fds = None # Just to make sure we're disabled
  396. def segment_iterator(self, reverse=False):
  397. data_path = os.path.join(self.path, 'data')
  398. dirs = sorted((dir for dir in os.listdir(data_path) if dir.isdigit()), key=int, reverse=reverse)
  399. for dir in dirs:
  400. filenames = os.listdir(os.path.join(data_path, dir))
  401. sorted_filenames = sorted((filename for filename in filenames
  402. if filename.isdigit()), key=int, reverse=reverse)
  403. for filename in sorted_filenames:
  404. yield int(filename), os.path.join(data_path, dir, filename)
  405. def get_latest_segment(self):
  406. for segment, filename in self.segment_iterator(reverse=True):
  407. return segment
  408. return None
  409. def get_segments_transaction_id(self):
  410. """Verify that the transaction id is consistent with the index transaction id
  411. """
  412. for segment, filename in self.segment_iterator(reverse=True):
  413. if self.is_committed_segment(filename):
  414. return segment
  415. return None
  416. def cleanup(self, transaction_id):
  417. """Delete segment files left by aborted transactions
  418. """
  419. self.segment = transaction_id + 1
  420. for segment, filename in self.segment_iterator(reverse=True):
  421. if segment > transaction_id:
  422. os.unlink(filename)
  423. else:
  424. break
  425. def is_committed_segment(self, filename):
  426. """Check if segment ends with a COMMIT_TAG tag
  427. """
  428. with open(filename, 'rb') as fd:
  429. try:
  430. fd.seek(-self.header_fmt.size, os.SEEK_END)
  431. except OSError as e:
  432. # return False if segment file is empty or too small
  433. if e.errno == errno.EINVAL:
  434. return False
  435. raise e
  436. return fd.read(self.header_fmt.size) == self.COMMIT
  437. def segment_filename(self, segment):
  438. return os.path.join(self.path, 'data', str(segment // self.segments_per_dir), str(segment))
  439. def get_write_fd(self, no_new=False):
  440. if not no_new and self.offset and self.offset > self.limit:
  441. self.close_segment()
  442. if not self._write_fd:
  443. if self.segment % self.segments_per_dir == 0:
  444. dirname = os.path.join(self.path, 'data', str(self.segment // self.segments_per_dir))
  445. if not os.path.exists(dirname):
  446. os.mkdir(dirname)
  447. self._write_fd = open(self.segment_filename(self.segment), 'ab')
  448. self._write_fd.write(MAGIC)
  449. self.offset = MAGIC_LEN
  450. return self._write_fd
  451. def get_fd(self, segment):
  452. try:
  453. return self.fds[segment]
  454. except KeyError:
  455. fd = open(self.segment_filename(segment), 'rb')
  456. self.fds[segment] = fd
  457. return fd
  458. def delete_segment(self, segment):
  459. if segment in self.fds:
  460. del self.fds[segment]
  461. try:
  462. os.unlink(self.segment_filename(segment))
  463. except OSError:
  464. pass
  465. def segment_exists(self, segment):
  466. return os.path.exists(self.segment_filename(segment))
  467. def iter_objects(self, segment, include_data=False):
  468. fd = self.get_fd(segment)
  469. fd.seek(0)
  470. if fd.read(MAGIC_LEN) != MAGIC:
  471. raise IntegrityError('Invalid segment magic [segment {}, offset {}]'.format(segment, 0))
  472. offset = MAGIC_LEN
  473. header = fd.read(self.header_fmt.size)
  474. while header:
  475. size, tag, key, data = self._read(fd, self.header_fmt, header, segment, offset,
  476. (TAG_PUT, TAG_DELETE, TAG_COMMIT))
  477. if include_data:
  478. yield tag, key, offset, data
  479. else:
  480. yield tag, key, offset
  481. offset += size
  482. header = fd.read(self.header_fmt.size)
  483. def recover_segment(self, segment, filename):
  484. if segment in self.fds:
  485. del self.fds[segment]
  486. with open(filename, 'rb') as fd:
  487. data = memoryview(fd.read())
  488. os.rename(filename, filename + '.beforerecover')
  489. logger.info('attempting to recover ' + filename)
  490. with open(filename, 'wb') as fd:
  491. fd.write(MAGIC)
  492. while len(data) >= self.header_fmt.size:
  493. crc, size, tag = self.header_fmt.unpack(data[:self.header_fmt.size])
  494. if size < self.header_fmt.size or size > len(data):
  495. data = data[1:]
  496. continue
  497. if crc32(data[4:size]) & 0xffffffff != crc:
  498. data = data[1:]
  499. continue
  500. fd.write(data[:size])
  501. data = data[size:]
  502. def read(self, segment, offset, id):
  503. if segment == self.segment and self._write_fd:
  504. self._write_fd.flush()
  505. fd = self.get_fd(segment)
  506. fd.seek(offset)
  507. header = fd.read(self.put_header_fmt.size)
  508. size, tag, key, data = self._read(fd, self.put_header_fmt, header, segment, offset, (TAG_PUT, ))
  509. if id != key:
  510. raise IntegrityError('Invalid segment entry header, is not for wanted id [segment {}, offset {}]'.format(
  511. segment, offset))
  512. return data
  513. def _read(self, fd, fmt, header, segment, offset, acceptable_tags):
  514. # some code shared by read() and iter_objects()
  515. try:
  516. hdr_tuple = fmt.unpack(header)
  517. except struct.error as err:
  518. raise IntegrityError('Invalid segment entry header [segment {}, offset {}]: {}'.format(
  519. segment, offset, err))
  520. if fmt is self.put_header_fmt:
  521. crc, size, tag, key = hdr_tuple
  522. elif fmt is self.header_fmt:
  523. crc, size, tag = hdr_tuple
  524. key = None
  525. else:
  526. raise TypeError("_read called with unsupported format")
  527. if size > MAX_OBJECT_SIZE or size < fmt.size:
  528. raise IntegrityError('Invalid segment entry size [segment {}, offset {}]'.format(
  529. segment, offset))
  530. length = size - fmt.size
  531. data = fd.read(length)
  532. if len(data) != length:
  533. raise IntegrityError('Segment entry data short read [segment {}, offset {}]: expected {}, got {} bytes'.format(
  534. segment, offset, length, len(data)))
  535. if crc32(data, crc32(memoryview(header)[4:])) & 0xffffffff != crc:
  536. raise IntegrityError('Segment entry checksum mismatch [segment {}, offset {}]'.format(
  537. segment, offset))
  538. if tag not in acceptable_tags:
  539. raise IntegrityError('Invalid segment entry header, did not get acceptable tag [segment {}, offset {}]'.format(
  540. segment, offset))
  541. if key is None and tag in (TAG_PUT, TAG_DELETE):
  542. key, data = data[:32], data[32:]
  543. return size, tag, key, data
  544. def write_put(self, id, data):
  545. size = len(data) + self.put_header_fmt.size
  546. fd = self.get_write_fd()
  547. offset = self.offset
  548. header = self.header_no_crc_fmt.pack(size, TAG_PUT)
  549. crc = self.crc_fmt.pack(crc32(data, crc32(id, crc32(header))) & 0xffffffff)
  550. fd.write(b''.join((crc, header, id, data)))
  551. self.offset += size
  552. return self.segment, offset
  553. def write_delete(self, id):
  554. fd = self.get_write_fd()
  555. header = self.header_no_crc_fmt.pack(self.put_header_fmt.size, TAG_DELETE)
  556. crc = self.crc_fmt.pack(crc32(id, crc32(header)) & 0xffffffff)
  557. fd.write(b''.join((crc, header, id)))
  558. self.offset += self.put_header_fmt.size
  559. return self.segment
  560. def write_commit(self):
  561. fd = self.get_write_fd(no_new=True)
  562. header = self.header_no_crc_fmt.pack(self.header_fmt.size, TAG_COMMIT)
  563. crc = self.crc_fmt.pack(crc32(header) & 0xffffffff)
  564. fd.write(b''.join((crc, header)))
  565. self.close_segment()
  566. def close_segment(self):
  567. if self._write_fd:
  568. self.segment += 1
  569. self.offset = 0
  570. self._write_fd.flush()
  571. os.fsync(self._write_fd.fileno())
  572. if hasattr(os, 'posix_fadvise'): # python >= 3.3, only on UNIX
  573. # tell the OS that it does not need to cache what we just wrote,
  574. # avoids spoiling the cache for the OS and other processes.
  575. os.posix_fadvise(self._write_fd.fileno(), 0, 0, os.POSIX_FADV_DONTNEED)
  576. self._write_fd.close()
  577. self._write_fd = None