Przeglądaj źródła

Merge pull request #1911 from enkore/f/itemnt

Work on metadata handling speed
enkore 9 lat temu
rodzic
commit
7e5ed40e2f

+ 6 - 1
setup.py

@@ -50,6 +50,7 @@ compress_source = 'src/borg/compress.pyx'
 crypto_source = 'src/borg/crypto.pyx'
 chunker_source = 'src/borg/chunker.pyx'
 hashindex_source = 'src/borg/hashindex.pyx'
+item_source = 'src/borg/item.pyx'
 platform_posix_source = 'src/borg/platform/posix.pyx'
 platform_linux_source = 'src/borg/platform/linux.pyx'
 platform_darwin_source = 'src/borg/platform/darwin.pyx'
@@ -60,6 +61,7 @@ cython_sources = [
     crypto_source,
     chunker_source,
     hashindex_source,
+    item_source,
 
     platform_posix_source,
     platform_linux_source,
@@ -83,6 +85,7 @@ try:
                 'src/borg/crypto.c',
                 'src/borg/chunker.c', 'src/borg/_chunker.c',
                 'src/borg/hashindex.c', 'src/borg/_hashindex.c',
+                'src/borg/item.c',
                 'src/borg/platform/posix.c',
                 'src/borg/platform/linux.c',
                 'src/borg/platform/freebsd.c',
@@ -99,6 +102,7 @@ except ImportError:
     crypto_source = crypto_source.replace('.pyx', '.c')
     chunker_source = chunker_source.replace('.pyx', '.c')
     hashindex_source = hashindex_source.replace('.pyx', '.c')
+    item_source = item_source.replace('.pyx', '.c')
     platform_posix_source = platform_posix_source.replace('.pyx', '.c')
     platform_linux_source = platform_linux_source.replace('.pyx', '.c')
     platform_freebsd_source = platform_freebsd_source.replace('.pyx', '.c')
@@ -358,7 +362,8 @@ if not on_rtd:
     Extension('borg.compress', [compress_source], libraries=['lz4'], include_dirs=include_dirs, library_dirs=library_dirs, define_macros=define_macros),
     Extension('borg.crypto', [crypto_source], libraries=crypto_libraries, include_dirs=include_dirs, library_dirs=library_dirs, define_macros=define_macros),
     Extension('borg.chunker', [chunker_source]),
-    Extension('borg.hashindex', [hashindex_source])
+    Extension('borg.hashindex', [hashindex_source]),
+    Extension('borg.item', [item_source]),
 ]
     if sys.platform.startswith(('linux', 'freebsd', 'darwin')):
         ext_modules.append(Extension('borg.platform.posix', [platform_posix_source]))

+ 24 - 22
src/borg/archive.py

@@ -29,12 +29,11 @@ from .helpers import Error, IntegrityError
 from .helpers import uid2user, user2uid, gid2group, group2gid
 from .helpers import parse_timestamp, to_localtime
 from .helpers import format_time, format_timedelta, format_file_size, file_status
-from .helpers import safe_encode, safe_decode, make_path_safe, remove_surrogates, swidth_slice
-from .helpers import decode_dict, StableDict
-from .helpers import int_to_bigint, bigint_to_int, bin_to_hex
+from .helpers import safe_encode, safe_decode, make_path_safe, remove_surrogates
+from .helpers import StableDict
+from .helpers import bin_to_hex
 from .helpers import ellipsis_truncate, ProgressIndicatorPercent, log_multi
 from .helpers import PathPrefixPattern, FnmatchPattern
-from .helpers import consume, chunkit
 from .helpers import CompressionDecider1, CompressionDecider2, CompressionSpec
 from .item import Item, ArchiveItem
 from .key import key_factory
@@ -125,19 +124,22 @@ class BackupOSError(Exception):
         return str(self.os_error)
 
 
-@contextmanager
-def backup_io():
-    """Context manager changing OSError to BackupOSError."""
-    try:
-        yield
-    except OSError as os_error:
-        raise BackupOSError(os_error) from os_error
+class BackupIO:
+    def __enter__(self):
+        pass
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if exc_type and issubclass(exc_type, OSError):
+            raise BackupOSError(exc_val) from exc_val
+
+
+backup_io = BackupIO()
 
 
 def backup_io_iter(iterator):
     while True:
         try:
-            with backup_io():
+            with backup_io:
                 item = next(iterator)
         except StopIteration:
             return
@@ -475,13 +477,13 @@ Number of files: {0.stats.nfiles}'''.format(
             pass
         mode = item.mode
         if stat.S_ISREG(mode):
-            with backup_io():
+            with backup_io:
                 if not os.path.exists(os.path.dirname(path)):
                     os.makedirs(os.path.dirname(path))
             # Hard link?
             if 'source' in item:
                 source = os.path.join(dest, *item.source.split(os.sep)[stripped_components:])
-                with backup_io():
+                with backup_io:
                     if os.path.exists(path):
                         os.unlink(path)
                     if item.source not in hardlink_masters:
@@ -490,24 +492,24 @@ Number of files: {0.stats.nfiles}'''.format(
                 item.chunks, link_target = hardlink_masters[item.source]
                 if link_target:
                     # Hard link was extracted previously, just link
-                    with backup_io():
+                    with backup_io:
                         os.link(link_target, path)
                     return
                 # Extract chunks, since the item which had the chunks was not extracted
-            with backup_io():
+            with backup_io:
                 fd = open(path, 'wb')
             with fd:
                 ids = [c.id for c in item.chunks]
                 for _, data in self.pipeline.fetch_many(ids, is_preloaded=True):
                     if pi:
                         pi.show(increase=len(data), info=[remove_surrogates(item.path)])
-                    with backup_io():
+                    with backup_io:
                         if sparse and self.zeros.startswith(data):
                             # all-zero chunk: create a hole in a sparse file
                             fd.seek(len(data), 1)
                         else:
                             fd.write(data)
-                with backup_io():
+                with backup_io:
                     pos = fd.tell()
                     fd.truncate(pos)
                     fd.flush()
@@ -519,7 +521,7 @@ Number of files: {0.stats.nfiles}'''.format(
                 # Update master entry with extracted file path, so that following hardlinks don't extract twice.
                 hardlink_masters[item.get('source') or original_path] = (None, path)
             return
-        with backup_io():
+        with backup_io:
             # No repository access beyond this point.
             if stat.S_ISDIR(mode):
                 if not os.path.exists(path):
@@ -705,7 +707,7 @@ Number of files: {0.stats.nfiles}'''.format(
 
     def stat_ext_attrs(self, st, path):
         attrs = {}
-        with backup_io():
+        with backup_io:
             xattrs = xattr.get_all(path, follow_symlinks=False)
             bsdflags = get_flags(path, st)
             acl_get(path, attrs, st, self.numeric_owner)
@@ -742,7 +744,7 @@ Number of files: {0.stats.nfiles}'''.format(
             return 'b'  # block device
 
     def process_symlink(self, path, st):
-        with backup_io():
+        with backup_io:
             source = os.readlink(path)
         item = Item(path=make_path_safe(path), source=source)
         item.update(self.stat_attrs(st, path))
@@ -854,7 +856,7 @@ Number of files: {0.stats.nfiles}'''.format(
         else:
             compress = self.compression_decider1.decide(path)
             self.file_compression_logger.debug('%s -> compression %s', path, compress['name'])
-            with backup_io():
+            with backup_io:
                 fh = Archive._open_rb(path)
             with os.fdopen(fh, 'rb') as fd:
                 self.chunk_file(item, cache, self.stats, backup_io_iter(self.chunker.chunkify(fd, fh)), compress=compress)

+ 1 - 1
src/borg/archiver.py

@@ -24,7 +24,7 @@ logger = create_logger()
 from . import __version__
 from . import helpers
 from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics, is_special
-from .archive import BackupOSError, CHUNKER_PARAMS
+from .archive import BackupOSError
 from .cache import Cache
 from .constants import *  # NOQA
 from .helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR

+ 4 - 4
src/borg/cache.py

@@ -15,7 +15,7 @@ from .hashindex import ChunkIndex, ChunkIndexEntry
 from .helpers import Location
 from .helpers import Error
 from .helpers import get_cache_dir, get_security_dir
-from .helpers import decode_dict, int_to_bigint, bigint_to_int, bin_to_hex
+from .helpers import bin_to_hex
 from .helpers import format_file_size
 from .helpers import yes
 from .helpers import remove_surrogates
@@ -350,7 +350,7 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
                     # this is to avoid issues with filesystem snapshots and mtime granularity.
                     # Also keep files from older backups that have not reached BORG_FILES_CACHE_TTL yet.
                     entry = FileCacheEntry(*msgpack.unpackb(item))
-                    if entry.age == 0 and bigint_to_int(entry.mtime) < self._newest_mtime or \
+                    if entry.age == 0 and entry.mtime < self._newest_mtime or \
                        entry.age > 0 and entry.age < ttl:
                         msgpack.pack((path_hash, entry), fd)
         pi.output('Saving cache config')
@@ -567,7 +567,7 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
         if not entry:
             return None
         entry = FileCacheEntry(*msgpack.unpackb(entry))
-        if (entry.size == st.st_size and bigint_to_int(entry.mtime) == st.st_mtime_ns and
+        if (entry.size == st.st_size and entry.mtime == st.st_mtime_ns and
                 (ignore_inode or entry.inode == st.st_ino)):
             self.files[path_hash] = msgpack.packb(entry._replace(age=0))
             return entry.chunk_ids
@@ -577,6 +577,6 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
     def memorize_file(self, path_hash, st, ids):
         if not (self.do_files and stat.S_ISREG(st.st_mode)):
             return
-        entry = FileCacheEntry(age=0, inode=st.st_ino, size=st.st_size, mtime=int_to_bigint(st.st_mtime_ns), chunk_ids=ids)
+        entry = FileCacheEntry(age=0, inode=st.st_ino, size=st.st_size, mtime=st.st_mtime_ns, chunk_ids=ids)
         self.files[path_hash] = msgpack.packb(entry)
         self._newest_mtime = max(self._newest_mtime or 0, st.st_mtime_ns)

+ 4 - 20
src/borg/helpers.py

@@ -86,7 +86,7 @@ class PlaceholderError(Error):
 
 
 def check_extension_modules():
-    from . import platform, compress
+    from . import platform, compress, item
     if hashindex.API_VERSION != 4:
         raise ExtensionModuleError
     if chunker.API_VERSION != 2:
@@ -97,6 +97,8 @@ def check_extension_modules():
         raise ExtensionModuleError
     if platform.API_VERSION != platform.OS_API_VERSION != 5:
         raise ExtensionModuleError
+    if item.API_VERSION != 1:
+        raise ExtensionModuleError
 
 
 ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts')
@@ -691,7 +693,7 @@ def SortBySpec(text):
 
 def safe_timestamp(item_timestamp_ns):
     try:
-        return datetime.fromtimestamp(bigint_to_int(item_timestamp_ns) / 1e9)
+        return datetime.fromtimestamp(item_timestamp_ns / 1e9)
     except OverflowError:
         # likely a broken file time and datetime did not want to go beyond year 9999
         return datetime(9999, 12, 31, 23, 59, 59)
@@ -1090,24 +1092,6 @@ class StableDict(dict):
         return sorted(super().items())
 
 
-def bigint_to_int(mtime):
-    """Convert bytearray to int
-    """
-    if isinstance(mtime, bytes):
-        return int.from_bytes(mtime, 'little', signed=True)
-    return mtime
-
-
-def int_to_bigint(value):
-    """Convert integers larger than 64 bits to bytearray
-
-    Smaller integers are left alone
-    """
-    if value.bit_length() > 63:
-        return value.to_bytes((value.bit_length() + 9) // 8, 'little', signed=True)
-    return value
-
-
 def is_slow_msgpack():
     return msgpack.Packer is msgpack.fallback.Packer
 

+ 5 - 4
src/borg/item.py → src/borg/item.pyx

@@ -1,8 +1,9 @@
 from .constants import ITEM_KEYS
 from .helpers import safe_encode, safe_decode
-from .helpers import bigint_to_int, int_to_bigint
 from .helpers import StableDict
 
+API_VERSION = 1
+
 
 class PropDict:
     """
@@ -151,9 +152,9 @@ class Item(PropDict):
     rdev = PropDict._make_property('rdev', int)
     bsdflags = PropDict._make_property('bsdflags', int)
 
-    atime = PropDict._make_property('atime', int, 'bigint', encode=int_to_bigint, decode=bigint_to_int)
-    ctime = PropDict._make_property('ctime', int, 'bigint', encode=int_to_bigint, decode=bigint_to_int)
-    mtime = PropDict._make_property('mtime', int, 'bigint', encode=int_to_bigint, decode=bigint_to_int)
+    atime = PropDict._make_property('atime', int)
+    ctime = PropDict._make_property('ctime', int)
+    mtime = PropDict._make_property('mtime', int)
 
     hardlink_master = PropDict._make_property('hardlink_master', bool)
 

+ 1 - 1
src/borg/key.py

@@ -14,7 +14,7 @@ logger = create_logger()
 
 from .constants import *  # NOQA
 from .compress import Compressor, get_compressor
-from .crypto import AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks, hmac_sha256, blake2b_256
+from .crypto import AES, bytes_to_long, bytes_to_int, num_aes_blocks, hmac_sha256, blake2b_256
 from .helpers import Chunk
 from .helpers import Error, IntegrityError
 from .helpers import yes

+ 1 - 1
src/borg/testsuite/archive.py

@@ -220,7 +220,7 @@ def test_key_length_msgpacked_items():
 
 def test_backup_io():
     with pytest.raises(BackupOSError):
-        with backup_io():
+        with backup_io:
             raise OSError(123)
 
 

+ 2 - 14
src/borg/testsuite/helpers.py

@@ -18,7 +18,7 @@ from ..helpers import prune_within, prune_split
 from ..helpers import get_cache_dir, get_keys_dir, get_security_dir
 from ..helpers import is_slow_msgpack
 from ..helpers import yes, TRUISH, FALSISH, DEFAULTISH
-from ..helpers import StableDict, int_to_bigint, bigint_to_int, bin_to_hex
+from ..helpers import StableDict, bin_to_hex
 from ..helpers import parse_timestamp, ChunkIteratorFileWrapper, ChunkerParams, Chunk
 from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless
 from ..helpers import load_excludes
@@ -27,19 +27,7 @@ from ..helpers import parse_pattern, PatternMatcher, RegexPattern, PathPrefixPat
 from ..helpers import swidth_slice
 from ..helpers import chunkit
 
-from . import BaseTestCase, environment_variable, FakeInputs
-
-
-class BigIntTestCase(BaseTestCase):
-
-    def test_bigint(self):
-        self.assert_equal(int_to_bigint(0), 0)
-        self.assert_equal(int_to_bigint(2**63-1), 2**63-1)
-        self.assert_equal(int_to_bigint(-2**63+1), -2**63+1)
-        self.assert_equal(int_to_bigint(2**63), b'\x00\x00\x00\x00\x00\x00\x00\x80\x00')
-        self.assert_equal(int_to_bigint(-2**63), b'\x00\x00\x00\x00\x00\x00\x00\x80\xff')
-        self.assert_equal(bigint_to_int(int_to_bigint(-2**70)), -2**70)
-        self.assert_equal(bigint_to_int(int_to_bigint(2**70)), 2**70)
+from . import BaseTestCase, FakeInputs
 
 
 def test_bin_to_hex():

+ 0 - 11
src/borg/testsuite/item.py

@@ -77,17 +77,6 @@ def test_item_int_property():
         item.mode = "invalid"
 
 
-def test_item_bigint_property():
-    item = Item()
-    small, big = 42, 2 ** 65
-    item.atime = small
-    assert item.atime == small
-    assert item.as_dict() == {'atime': small}
-    item.atime = big
-    assert item.atime == big
-    assert item.as_dict() == {'atime': b'\0' * 8 + b'\x02'}
-
-
 def test_item_user_group_none():
     item = Item()
     item.user = None