Bladeren bron

Merge pull request #1911 from enkore/f/itemnt

Work on metadata handling speed
enkore 8 jaren geleden
bovenliggende
commit
7e5ed40e2f

+ 6 - 1
setup.py

@@ -50,6 +50,7 @@ compress_source = 'src/borg/compress.pyx'
 crypto_source = 'src/borg/crypto.pyx'
 crypto_source = 'src/borg/crypto.pyx'
 chunker_source = 'src/borg/chunker.pyx'
 chunker_source = 'src/borg/chunker.pyx'
 hashindex_source = 'src/borg/hashindex.pyx'
 hashindex_source = 'src/borg/hashindex.pyx'
+item_source = 'src/borg/item.pyx'
 platform_posix_source = 'src/borg/platform/posix.pyx'
 platform_posix_source = 'src/borg/platform/posix.pyx'
 platform_linux_source = 'src/borg/platform/linux.pyx'
 platform_linux_source = 'src/borg/platform/linux.pyx'
 platform_darwin_source = 'src/borg/platform/darwin.pyx'
 platform_darwin_source = 'src/borg/platform/darwin.pyx'
@@ -60,6 +61,7 @@ cython_sources = [
     crypto_source,
     crypto_source,
     chunker_source,
     chunker_source,
     hashindex_source,
     hashindex_source,
+    item_source,
 
 
     platform_posix_source,
     platform_posix_source,
     platform_linux_source,
     platform_linux_source,
@@ -83,6 +85,7 @@ try:
                 'src/borg/crypto.c',
                 'src/borg/crypto.c',
                 'src/borg/chunker.c', 'src/borg/_chunker.c',
                 'src/borg/chunker.c', 'src/borg/_chunker.c',
                 'src/borg/hashindex.c', 'src/borg/_hashindex.c',
                 'src/borg/hashindex.c', 'src/borg/_hashindex.c',
+                'src/borg/item.c',
                 'src/borg/platform/posix.c',
                 'src/borg/platform/posix.c',
                 'src/borg/platform/linux.c',
                 'src/borg/platform/linux.c',
                 'src/borg/platform/freebsd.c',
                 'src/borg/platform/freebsd.c',
@@ -99,6 +102,7 @@ except ImportError:
     crypto_source = crypto_source.replace('.pyx', '.c')
     crypto_source = crypto_source.replace('.pyx', '.c')
     chunker_source = chunker_source.replace('.pyx', '.c')
     chunker_source = chunker_source.replace('.pyx', '.c')
     hashindex_source = hashindex_source.replace('.pyx', '.c')
     hashindex_source = hashindex_source.replace('.pyx', '.c')
+    item_source = item_source.replace('.pyx', '.c')
     platform_posix_source = platform_posix_source.replace('.pyx', '.c')
     platform_posix_source = platform_posix_source.replace('.pyx', '.c')
     platform_linux_source = platform_linux_source.replace('.pyx', '.c')
     platform_linux_source = platform_linux_source.replace('.pyx', '.c')
     platform_freebsd_source = platform_freebsd_source.replace('.pyx', '.c')
     platform_freebsd_source = platform_freebsd_source.replace('.pyx', '.c')
@@ -358,7 +362,8 @@ if not on_rtd:
     Extension('borg.compress', [compress_source], libraries=['lz4'], include_dirs=include_dirs, library_dirs=library_dirs, define_macros=define_macros),
     Extension('borg.compress', [compress_source], libraries=['lz4'], include_dirs=include_dirs, library_dirs=library_dirs, define_macros=define_macros),
     Extension('borg.crypto', [crypto_source], libraries=crypto_libraries, include_dirs=include_dirs, library_dirs=library_dirs, define_macros=define_macros),
     Extension('borg.crypto', [crypto_source], libraries=crypto_libraries, include_dirs=include_dirs, library_dirs=library_dirs, define_macros=define_macros),
     Extension('borg.chunker', [chunker_source]),
     Extension('borg.chunker', [chunker_source]),
-    Extension('borg.hashindex', [hashindex_source])
+    Extension('borg.hashindex', [hashindex_source]),
+    Extension('borg.item', [item_source]),
 ]
 ]
     if sys.platform.startswith(('linux', 'freebsd', 'darwin')):
     if sys.platform.startswith(('linux', 'freebsd', 'darwin')):
         ext_modules.append(Extension('borg.platform.posix', [platform_posix_source]))
         ext_modules.append(Extension('borg.platform.posix', [platform_posix_source]))

+ 24 - 22
src/borg/archive.py

@@ -29,12 +29,11 @@ from .helpers import Error, IntegrityError
 from .helpers import uid2user, user2uid, gid2group, group2gid
 from .helpers import uid2user, user2uid, gid2group, group2gid
 from .helpers import parse_timestamp, to_localtime
 from .helpers import parse_timestamp, to_localtime
 from .helpers import format_time, format_timedelta, format_file_size, file_status
 from .helpers import format_time, format_timedelta, format_file_size, file_status
-from .helpers import safe_encode, safe_decode, make_path_safe, remove_surrogates, swidth_slice
-from .helpers import decode_dict, StableDict
-from .helpers import int_to_bigint, bigint_to_int, bin_to_hex
+from .helpers import safe_encode, safe_decode, make_path_safe, remove_surrogates
+from .helpers import StableDict
+from .helpers import bin_to_hex
 from .helpers import ellipsis_truncate, ProgressIndicatorPercent, log_multi
 from .helpers import ellipsis_truncate, ProgressIndicatorPercent, log_multi
 from .helpers import PathPrefixPattern, FnmatchPattern
 from .helpers import PathPrefixPattern, FnmatchPattern
-from .helpers import consume, chunkit
 from .helpers import CompressionDecider1, CompressionDecider2, CompressionSpec
 from .helpers import CompressionDecider1, CompressionDecider2, CompressionSpec
 from .item import Item, ArchiveItem
 from .item import Item, ArchiveItem
 from .key import key_factory
 from .key import key_factory
@@ -125,19 +124,22 @@ class BackupOSError(Exception):
         return str(self.os_error)
         return str(self.os_error)
 
 
 
 
-@contextmanager
-def backup_io():
-    """Context manager changing OSError to BackupOSError."""
-    try:
-        yield
-    except OSError as os_error:
-        raise BackupOSError(os_error) from os_error
+class BackupIO:
+    def __enter__(self):
+        pass
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if exc_type and issubclass(exc_type, OSError):
+            raise BackupOSError(exc_val) from exc_val
+
+
+backup_io = BackupIO()
 
 
 
 
 def backup_io_iter(iterator):
 def backup_io_iter(iterator):
     while True:
     while True:
         try:
         try:
-            with backup_io():
+            with backup_io:
                 item = next(iterator)
                 item = next(iterator)
         except StopIteration:
         except StopIteration:
             return
             return
@@ -475,13 +477,13 @@ Number of files: {0.stats.nfiles}'''.format(
             pass
             pass
         mode = item.mode
         mode = item.mode
         if stat.S_ISREG(mode):
         if stat.S_ISREG(mode):
-            with backup_io():
+            with backup_io:
                 if not os.path.exists(os.path.dirname(path)):
                 if not os.path.exists(os.path.dirname(path)):
                     os.makedirs(os.path.dirname(path))
                     os.makedirs(os.path.dirname(path))
             # Hard link?
             # Hard link?
             if 'source' in item:
             if 'source' in item:
                 source = os.path.join(dest, *item.source.split(os.sep)[stripped_components:])
                 source = os.path.join(dest, *item.source.split(os.sep)[stripped_components:])
-                with backup_io():
+                with backup_io:
                     if os.path.exists(path):
                     if os.path.exists(path):
                         os.unlink(path)
                         os.unlink(path)
                     if item.source not in hardlink_masters:
                     if item.source not in hardlink_masters:
@@ -490,24 +492,24 @@ Number of files: {0.stats.nfiles}'''.format(
                 item.chunks, link_target = hardlink_masters[item.source]
                 item.chunks, link_target = hardlink_masters[item.source]
                 if link_target:
                 if link_target:
                     # Hard link was extracted previously, just link
                     # Hard link was extracted previously, just link
-                    with backup_io():
+                    with backup_io:
                         os.link(link_target, path)
                         os.link(link_target, path)
                     return
                     return
                 # Extract chunks, since the item which had the chunks was not extracted
                 # Extract chunks, since the item which had the chunks was not extracted
-            with backup_io():
+            with backup_io:
                 fd = open(path, 'wb')
                 fd = open(path, 'wb')
             with fd:
             with fd:
                 ids = [c.id for c in item.chunks]
                 ids = [c.id for c in item.chunks]
                 for _, data in self.pipeline.fetch_many(ids, is_preloaded=True):
                 for _, data in self.pipeline.fetch_many(ids, is_preloaded=True):
                     if pi:
                     if pi:
                         pi.show(increase=len(data), info=[remove_surrogates(item.path)])
                         pi.show(increase=len(data), info=[remove_surrogates(item.path)])
-                    with backup_io():
+                    with backup_io:
                         if sparse and self.zeros.startswith(data):
                         if sparse and self.zeros.startswith(data):
                             # all-zero chunk: create a hole in a sparse file
                             # all-zero chunk: create a hole in a sparse file
                             fd.seek(len(data), 1)
                             fd.seek(len(data), 1)
                         else:
                         else:
                             fd.write(data)
                             fd.write(data)
-                with backup_io():
+                with backup_io:
                     pos = fd.tell()
                     pos = fd.tell()
                     fd.truncate(pos)
                     fd.truncate(pos)
                     fd.flush()
                     fd.flush()
@@ -519,7 +521,7 @@ Number of files: {0.stats.nfiles}'''.format(
                 # Update master entry with extracted file path, so that following hardlinks don't extract twice.
                 # Update master entry with extracted file path, so that following hardlinks don't extract twice.
                 hardlink_masters[item.get('source') or original_path] = (None, path)
                 hardlink_masters[item.get('source') or original_path] = (None, path)
             return
             return
-        with backup_io():
+        with backup_io:
             # No repository access beyond this point.
             # No repository access beyond this point.
             if stat.S_ISDIR(mode):
             if stat.S_ISDIR(mode):
                 if not os.path.exists(path):
                 if not os.path.exists(path):
@@ -705,7 +707,7 @@ Number of files: {0.stats.nfiles}'''.format(
 
 
     def stat_ext_attrs(self, st, path):
     def stat_ext_attrs(self, st, path):
         attrs = {}
         attrs = {}
-        with backup_io():
+        with backup_io:
             xattrs = xattr.get_all(path, follow_symlinks=False)
             xattrs = xattr.get_all(path, follow_symlinks=False)
             bsdflags = get_flags(path, st)
             bsdflags = get_flags(path, st)
             acl_get(path, attrs, st, self.numeric_owner)
             acl_get(path, attrs, st, self.numeric_owner)
@@ -742,7 +744,7 @@ Number of files: {0.stats.nfiles}'''.format(
             return 'b'  # block device
             return 'b'  # block device
 
 
     def process_symlink(self, path, st):
     def process_symlink(self, path, st):
-        with backup_io():
+        with backup_io:
             source = os.readlink(path)
             source = os.readlink(path)
         item = Item(path=make_path_safe(path), source=source)
         item = Item(path=make_path_safe(path), source=source)
         item.update(self.stat_attrs(st, path))
         item.update(self.stat_attrs(st, path))
@@ -854,7 +856,7 @@ Number of files: {0.stats.nfiles}'''.format(
         else:
         else:
             compress = self.compression_decider1.decide(path)
             compress = self.compression_decider1.decide(path)
             self.file_compression_logger.debug('%s -> compression %s', path, compress['name'])
             self.file_compression_logger.debug('%s -> compression %s', path, compress['name'])
-            with backup_io():
+            with backup_io:
                 fh = Archive._open_rb(path)
                 fh = Archive._open_rb(path)
             with os.fdopen(fh, 'rb') as fd:
             with os.fdopen(fh, 'rb') as fd:
                 self.chunk_file(item, cache, self.stats, backup_io_iter(self.chunker.chunkify(fd, fh)), compress=compress)
                 self.chunk_file(item, cache, self.stats, backup_io_iter(self.chunker.chunkify(fd, fh)), compress=compress)

+ 1 - 1
src/borg/archiver.py

@@ -24,7 +24,7 @@ logger = create_logger()
 from . import __version__
 from . import __version__
 from . import helpers
 from . import helpers
 from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics, is_special
 from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics, is_special
-from .archive import BackupOSError, CHUNKER_PARAMS
+from .archive import BackupOSError
 from .cache import Cache
 from .cache import Cache
 from .constants import *  # NOQA
 from .constants import *  # NOQA
 from .helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR
 from .helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR

+ 4 - 4
src/borg/cache.py

@@ -15,7 +15,7 @@ from .hashindex import ChunkIndex, ChunkIndexEntry
 from .helpers import Location
 from .helpers import Location
 from .helpers import Error
 from .helpers import Error
 from .helpers import get_cache_dir, get_security_dir
 from .helpers import get_cache_dir, get_security_dir
-from .helpers import decode_dict, int_to_bigint, bigint_to_int, bin_to_hex
+from .helpers import bin_to_hex
 from .helpers import format_file_size
 from .helpers import format_file_size
 from .helpers import yes
 from .helpers import yes
 from .helpers import remove_surrogates
 from .helpers import remove_surrogates
@@ -350,7 +350,7 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
                     # this is to avoid issues with filesystem snapshots and mtime granularity.
                     # this is to avoid issues with filesystem snapshots and mtime granularity.
                     # Also keep files from older backups that have not reached BORG_FILES_CACHE_TTL yet.
                     # Also keep files from older backups that have not reached BORG_FILES_CACHE_TTL yet.
                     entry = FileCacheEntry(*msgpack.unpackb(item))
                     entry = FileCacheEntry(*msgpack.unpackb(item))
-                    if entry.age == 0 and bigint_to_int(entry.mtime) < self._newest_mtime or \
+                    if entry.age == 0 and entry.mtime < self._newest_mtime or \
                        entry.age > 0 and entry.age < ttl:
                        entry.age > 0 and entry.age < ttl:
                         msgpack.pack((path_hash, entry), fd)
                         msgpack.pack((path_hash, entry), fd)
         pi.output('Saving cache config')
         pi.output('Saving cache config')
@@ -567,7 +567,7 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
         if not entry:
         if not entry:
             return None
             return None
         entry = FileCacheEntry(*msgpack.unpackb(entry))
         entry = FileCacheEntry(*msgpack.unpackb(entry))
-        if (entry.size == st.st_size and bigint_to_int(entry.mtime) == st.st_mtime_ns and
+        if (entry.size == st.st_size and entry.mtime == st.st_mtime_ns and
                 (ignore_inode or entry.inode == st.st_ino)):
                 (ignore_inode or entry.inode == st.st_ino)):
             self.files[path_hash] = msgpack.packb(entry._replace(age=0))
             self.files[path_hash] = msgpack.packb(entry._replace(age=0))
             return entry.chunk_ids
             return entry.chunk_ids
@@ -577,6 +577,6 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
     def memorize_file(self, path_hash, st, ids):
     def memorize_file(self, path_hash, st, ids):
         if not (self.do_files and stat.S_ISREG(st.st_mode)):
         if not (self.do_files and stat.S_ISREG(st.st_mode)):
             return
             return
-        entry = FileCacheEntry(age=0, inode=st.st_ino, size=st.st_size, mtime=int_to_bigint(st.st_mtime_ns), chunk_ids=ids)
+        entry = FileCacheEntry(age=0, inode=st.st_ino, size=st.st_size, mtime=st.st_mtime_ns, chunk_ids=ids)
         self.files[path_hash] = msgpack.packb(entry)
         self.files[path_hash] = msgpack.packb(entry)
         self._newest_mtime = max(self._newest_mtime or 0, st.st_mtime_ns)
         self._newest_mtime = max(self._newest_mtime or 0, st.st_mtime_ns)

+ 4 - 20
src/borg/helpers.py

@@ -86,7 +86,7 @@ class PlaceholderError(Error):
 
 
 
 
 def check_extension_modules():
 def check_extension_modules():
-    from . import platform, compress
+    from . import platform, compress, item
     if hashindex.API_VERSION != 4:
     if hashindex.API_VERSION != 4:
         raise ExtensionModuleError
         raise ExtensionModuleError
     if chunker.API_VERSION != 2:
     if chunker.API_VERSION != 2:
@@ -97,6 +97,8 @@ def check_extension_modules():
         raise ExtensionModuleError
         raise ExtensionModuleError
     if platform.API_VERSION != platform.OS_API_VERSION != 5:
     if platform.API_VERSION != platform.OS_API_VERSION != 5:
         raise ExtensionModuleError
         raise ExtensionModuleError
+    if item.API_VERSION != 1:
+        raise ExtensionModuleError
 
 
 
 
 ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts')
 ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts')
@@ -691,7 +693,7 @@ def SortBySpec(text):
 
 
 def safe_timestamp(item_timestamp_ns):
 def safe_timestamp(item_timestamp_ns):
     try:
     try:
-        return datetime.fromtimestamp(bigint_to_int(item_timestamp_ns) / 1e9)
+        return datetime.fromtimestamp(item_timestamp_ns / 1e9)
     except OverflowError:
     except OverflowError:
         # likely a broken file time and datetime did not want to go beyond year 9999
         # likely a broken file time and datetime did not want to go beyond year 9999
         return datetime(9999, 12, 31, 23, 59, 59)
         return datetime(9999, 12, 31, 23, 59, 59)
@@ -1090,24 +1092,6 @@ class StableDict(dict):
         return sorted(super().items())
         return sorted(super().items())
 
 
 
 
-def bigint_to_int(mtime):
-    """Convert bytearray to int
-    """
-    if isinstance(mtime, bytes):
-        return int.from_bytes(mtime, 'little', signed=True)
-    return mtime
-
-
-def int_to_bigint(value):
-    """Convert integers larger than 64 bits to bytearray
-
-    Smaller integers are left alone
-    """
-    if value.bit_length() > 63:
-        return value.to_bytes((value.bit_length() + 9) // 8, 'little', signed=True)
-    return value
-
-
 def is_slow_msgpack():
 def is_slow_msgpack():
     return msgpack.Packer is msgpack.fallback.Packer
     return msgpack.Packer is msgpack.fallback.Packer
 
 

+ 5 - 4
src/borg/item.py → src/borg/item.pyx

@@ -1,8 +1,9 @@
 from .constants import ITEM_KEYS
 from .constants import ITEM_KEYS
 from .helpers import safe_encode, safe_decode
 from .helpers import safe_encode, safe_decode
-from .helpers import bigint_to_int, int_to_bigint
 from .helpers import StableDict
 from .helpers import StableDict
 
 
+API_VERSION = 1
+
 
 
 class PropDict:
 class PropDict:
     """
     """
@@ -151,9 +152,9 @@ class Item(PropDict):
     rdev = PropDict._make_property('rdev', int)
     rdev = PropDict._make_property('rdev', int)
     bsdflags = PropDict._make_property('bsdflags', int)
     bsdflags = PropDict._make_property('bsdflags', int)
 
 
-    atime = PropDict._make_property('atime', int, 'bigint', encode=int_to_bigint, decode=bigint_to_int)
-    ctime = PropDict._make_property('ctime', int, 'bigint', encode=int_to_bigint, decode=bigint_to_int)
-    mtime = PropDict._make_property('mtime', int, 'bigint', encode=int_to_bigint, decode=bigint_to_int)
+    atime = PropDict._make_property('atime', int)
+    ctime = PropDict._make_property('ctime', int)
+    mtime = PropDict._make_property('mtime', int)
 
 
     hardlink_master = PropDict._make_property('hardlink_master', bool)
     hardlink_master = PropDict._make_property('hardlink_master', bool)
 
 

+ 1 - 1
src/borg/key.py

@@ -14,7 +14,7 @@ logger = create_logger()
 
 
 from .constants import *  # NOQA
 from .constants import *  # NOQA
 from .compress import Compressor, get_compressor
 from .compress import Compressor, get_compressor
-from .crypto import AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks, hmac_sha256, blake2b_256
+from .crypto import AES, bytes_to_long, bytes_to_int, num_aes_blocks, hmac_sha256, blake2b_256
 from .helpers import Chunk
 from .helpers import Chunk
 from .helpers import Error, IntegrityError
 from .helpers import Error, IntegrityError
 from .helpers import yes
 from .helpers import yes

+ 1 - 1
src/borg/testsuite/archive.py

@@ -220,7 +220,7 @@ def test_key_length_msgpacked_items():
 
 
 def test_backup_io():
 def test_backup_io():
     with pytest.raises(BackupOSError):
     with pytest.raises(BackupOSError):
-        with backup_io():
+        with backup_io:
             raise OSError(123)
             raise OSError(123)
 
 
 
 

+ 2 - 14
src/borg/testsuite/helpers.py

@@ -18,7 +18,7 @@ from ..helpers import prune_within, prune_split
 from ..helpers import get_cache_dir, get_keys_dir, get_security_dir
 from ..helpers import get_cache_dir, get_keys_dir, get_security_dir
 from ..helpers import is_slow_msgpack
 from ..helpers import is_slow_msgpack
 from ..helpers import yes, TRUISH, FALSISH, DEFAULTISH
 from ..helpers import yes, TRUISH, FALSISH, DEFAULTISH
-from ..helpers import StableDict, int_to_bigint, bigint_to_int, bin_to_hex
+from ..helpers import StableDict, bin_to_hex
 from ..helpers import parse_timestamp, ChunkIteratorFileWrapper, ChunkerParams, Chunk
 from ..helpers import parse_timestamp, ChunkIteratorFileWrapper, ChunkerParams, Chunk
 from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless
 from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless
 from ..helpers import load_excludes
 from ..helpers import load_excludes
@@ -27,19 +27,7 @@ from ..helpers import parse_pattern, PatternMatcher, RegexPattern, PathPrefixPat
 from ..helpers import swidth_slice
 from ..helpers import swidth_slice
 from ..helpers import chunkit
 from ..helpers import chunkit
 
 
-from . import BaseTestCase, environment_variable, FakeInputs
-
-
-class BigIntTestCase(BaseTestCase):
-
-    def test_bigint(self):
-        self.assert_equal(int_to_bigint(0), 0)
-        self.assert_equal(int_to_bigint(2**63-1), 2**63-1)
-        self.assert_equal(int_to_bigint(-2**63+1), -2**63+1)
-        self.assert_equal(int_to_bigint(2**63), b'\x00\x00\x00\x00\x00\x00\x00\x80\x00')
-        self.assert_equal(int_to_bigint(-2**63), b'\x00\x00\x00\x00\x00\x00\x00\x80\xff')
-        self.assert_equal(bigint_to_int(int_to_bigint(-2**70)), -2**70)
-        self.assert_equal(bigint_to_int(int_to_bigint(2**70)), 2**70)
+from . import BaseTestCase, FakeInputs
 
 
 
 
 def test_bin_to_hex():
 def test_bin_to_hex():

+ 0 - 11
src/borg/testsuite/item.py

@@ -77,17 +77,6 @@ def test_item_int_property():
         item.mode = "invalid"
         item.mode = "invalid"
 
 
 
 
-def test_item_bigint_property():
-    item = Item()
-    small, big = 42, 2 ** 65
-    item.atime = small
-    assert item.atime == small
-    assert item.as_dict() == {'atime': small}
-    item.atime = big
-    assert item.atime == big
-    assert item.as_dict() == {'atime': b'\0' * 8 + b'\x02'}
-
-
 def test_item_user_group_none():
 def test_item_user_group_none():
     item = Item()
     item = Item()
     item.user = None
     item.user = None