Browse Source

move benchmark commands to archiver.benchmarks

Thomas Waldmann 2 years ago
parent
commit
5ea9fb73db
3 changed files with 318 additions and 299 deletions
  1. 1 0
      setup.cfg
  2. 5 299
      src/borg/archiver/__init__.py
  3. 312 0
      src/borg/archiver/benchmarks.py

+ 1 - 0
setup.cfg

@@ -117,6 +117,7 @@ per_file_ignores =
     docs/conf.py:E121,E126,E265,E305,E401,E402
     docs/conf.py:E121,E126,E265,E305,E401,E402
     src/borg/archive.py:E122,E125,E127,E402,E501,F401,F405,W504
     src/borg/archive.py:E122,E125,E127,E402,E501,F401,F405,W504
     src/borg/archiver/__init__.py:E402,E501,E722,E741,F405
     src/borg/archiver/__init__.py:E402,E501,E722,E741,F405
+    src/borg/archiver/benchmarks.py:F405
     src/borg/archiver/common.py:E501,F405
     src/borg/archiver/common.py:E501,F405
     src/borg/archiver/debug.py:F405
     src/borg/archiver/debug.py:F405
     src/borg/archiver/tar.py:F405
     src/borg/archiver/tar.py:F405

+ 5 - 299
src/borg/archiver/__init__.py

@@ -16,14 +16,12 @@ try:
     import os
     import os
     import re
     import re
     import shlex
     import shlex
-    import shutil
     import signal
     import signal
     import stat
     import stat
     import subprocess
     import subprocess
     import textwrap
     import textwrap
     import time
     import time
     from binascii import unhexlify
     from binascii import unhexlify
-    from contextlib import contextmanager
     from datetime import datetime, timedelta
     from datetime import datetime, timedelta
     from io import TextIOWrapper
     from io import TextIOWrapper
 
 
@@ -40,7 +38,7 @@ try:
     from ..cache import Cache, assert_secure, SecurityManager
     from ..cache import Cache, assert_secure, SecurityManager
     from ..constants import *  # NOQA
     from ..constants import *  # NOQA
     from ..compress import CompressionSpec
     from ..compress import CompressionSpec
-    from ..crypto.key import FlexiKey, key_creator, key_argument_names, tam_required_file
+    from ..crypto.key import key_creator, key_argument_names, tam_required_file
     from ..crypto.key import AESOCBRepoKey, CHPORepoKey, Blake2AESOCBRepoKey, Blake2CHPORepoKey
     from ..crypto.key import AESOCBRepoKey, CHPORepoKey, Blake2AESOCBRepoKey, Blake2CHPORepoKey
     from ..crypto.key import AESOCBKeyfileKey, CHPOKeyfileKey, Blake2AESOCBKeyfileKey, Blake2CHPOKeyfileKey
     from ..crypto.key import AESOCBKeyfileKey, CHPOKeyfileKey, Blake2AESOCBKeyfileKey, Blake2CHPOKeyfileKey
     from ..crypto.keymanager import KeyManager
     from ..crypto.keymanager import KeyManager
@@ -71,8 +69,7 @@ try:
     from ..helpers import iter_separated
     from ..helpers import iter_separated
     from ..nanorst import rst_to_terminal
     from ..nanorst import rst_to_terminal
     from ..patterns import PatternMatcher
     from ..patterns import PatternMatcher
-    from ..item import Item
-    from ..platform import get_flags, SyncFile
+    from ..platform import get_flags
     from ..platform import uid2user, gid2group
     from ..platform import uid2user, gid2group
     from ..remote import RepositoryServer, RemoteRepository, cache_if_remote
     from ..remote import RepositoryServer, RemoteRepository, cache_if_remote
     from ..selftest import selftest
     from ..selftest import selftest
@@ -110,11 +107,12 @@ def get_func(args):
     raise Exception("expected func attributes not found")
     raise Exception("expected func attributes not found")
 
 
 
 
+from .benchmarks import BenchmarkMixIn
 from .debug import DebugMixIn
 from .debug import DebugMixIn
 from .tar import TarMixIn
 from .tar import TarMixIn
 
 
 
 
-class Archiver(DebugMixIn, TarMixIn):
+class Archiver(DebugMixIn, TarMixIn, BenchmarkMixIn):
     def __init__(self, lock_wait=None, prog=None):
     def __init__(self, lock_wait=None, prog=None):
         self.exit_code = EXIT_SUCCESS
         self.exit_code = EXIT_SUCCESS
         self.lock_wait = lock_wait
         self.lock_wait = lock_wait
@@ -446,208 +444,6 @@ class Archiver(DebugMixIn, TarMixIn):
             manager.import_keyfile(args)
             manager.import_keyfile(args)
         return EXIT_SUCCESS
         return EXIT_SUCCESS
 
 
-    def do_benchmark_crud(self, args):
-        """Benchmark Create, Read, Update, Delete for archives."""
-
-        def measurement_run(repo, path):
-            compression = "--compression=none"
-            # measure create perf (without files cache to always have it chunking)
-            t_start = time.monotonic()
-            rc = self.do_create(
-                self.parse_args(
-                    [f"--repo={repo}", "create", compression, "--files-cache=disabled", "borg-benchmark-crud1", path]
-                )
-            )
-            t_end = time.monotonic()
-            dt_create = t_end - t_start
-            assert rc == 0
-            # now build files cache
-            rc1 = self.do_create(
-                self.parse_args([f"--repo={repo}", "create", compression, "borg-benchmark-crud2", path])
-            )
-            rc2 = self.do_delete(self.parse_args([f"--repo={repo}", "delete", "-a", "borg-benchmark-crud2"]))
-            assert rc1 == rc2 == 0
-            # measure a no-change update (archive1 is still present)
-            t_start = time.monotonic()
-            rc1 = self.do_create(
-                self.parse_args([f"--repo={repo}", "create", compression, "borg-benchmark-crud3", path])
-            )
-            t_end = time.monotonic()
-            dt_update = t_end - t_start
-            rc2 = self.do_delete(self.parse_args([f"--repo={repo}", "delete", "-a", "borg-benchmark-crud3"]))
-            assert rc1 == rc2 == 0
-            # measure extraction (dry-run: without writing result to disk)
-            t_start = time.monotonic()
-            rc = self.do_extract(self.parse_args([f"--repo={repo}", "extract", "borg-benchmark-crud1", "--dry-run"]))
-            t_end = time.monotonic()
-            dt_extract = t_end - t_start
-            assert rc == 0
-            # measure archive deletion (of LAST present archive with the data)
-            t_start = time.monotonic()
-            rc = self.do_delete(self.parse_args([f"--repo={repo}", "delete", "-a", "borg-benchmark-crud1"]))
-            t_end = time.monotonic()
-            dt_delete = t_end - t_start
-            assert rc == 0
-            return dt_create, dt_update, dt_extract, dt_delete
-
-        @contextmanager
-        def test_files(path, count, size, random):
-            try:
-                path = os.path.join(path, "borg-test-data")
-                os.makedirs(path)
-                z_buff = None if random else memoryview(zeros)[:size] if size <= len(zeros) else b"\0" * size
-                for i in range(count):
-                    fname = os.path.join(path, "file_%d" % i)
-                    data = z_buff if not random else os.urandom(size)
-                    with SyncFile(fname, binary=True) as fd:  # used for posix_fadvise's sake
-                        fd.write(data)
-                yield path
-            finally:
-                shutil.rmtree(path)
-
-        if "_BORG_BENCHMARK_CRUD_TEST" in os.environ:
-            tests = [("Z-TEST", 1, 1, False), ("R-TEST", 1, 1, True)]
-        else:
-            tests = [
-                ("Z-BIG", 10, 100000000, False),
-                ("R-BIG", 10, 100000000, True),
-                ("Z-MEDIUM", 1000, 1000000, False),
-                ("R-MEDIUM", 1000, 1000000, True),
-                ("Z-SMALL", 10000, 10000, False),
-                ("R-SMALL", 10000, 10000, True),
-            ]
-
-        for msg, count, size, random in tests:
-            with test_files(args.path, count, size, random) as path:
-                dt_create, dt_update, dt_extract, dt_delete = measurement_run(args.location.canonical_path(), path)
-            total_size_MB = count * size / 1e06
-            file_size_formatted = format_file_size(size)
-            content = "random" if random else "all-zero"
-            fmt = "%s-%-10s %9.2f MB/s (%d * %s %s files: %.2fs)"
-            print(fmt % ("C", msg, total_size_MB / dt_create, count, file_size_formatted, content, dt_create))
-            print(fmt % ("R", msg, total_size_MB / dt_extract, count, file_size_formatted, content, dt_extract))
-            print(fmt % ("U", msg, total_size_MB / dt_update, count, file_size_formatted, content, dt_update))
-            print(fmt % ("D", msg, total_size_MB / dt_delete, count, file_size_formatted, content, dt_delete))
-
-        return 0
-
-    def do_benchmark_cpu(self, args):
-        """Benchmark CPU bound operations."""
-        from timeit import timeit
-
-        random_10M = os.urandom(10 * 1000 * 1000)
-        key_256 = os.urandom(32)
-        key_128 = os.urandom(16)
-        key_96 = os.urandom(12)
-
-        import io
-        from .chunker import get_chunker
-
-        print("Chunkers =======================================================")
-        size = "1GB"
-
-        def chunkit(chunker_name, *args, **kwargs):
-            with io.BytesIO(random_10M) as data_file:
-                ch = get_chunker(chunker_name, *args, **kwargs)
-                for _ in ch.chunkify(fd=data_file):
-                    pass
-
-        for spec, func in [
-            ("buzhash,19,23,21,4095", lambda: chunkit("buzhash", 19, 23, 21, 4095, seed=0)),
-            ("fixed,1048576", lambda: chunkit("fixed", 1048576, sparse=False)),
-        ]:
-            print(f"{spec:<24} {size:<10} {timeit(func, number=100):.3f}s")
-
-        from .checksums import crc32, xxh64
-
-        print("Non-cryptographic checksums / hashes ===========================")
-        size = "1GB"
-        tests = [("xxh64", lambda: xxh64(random_10M)), ("crc32 (zlib)", lambda: crc32(random_10M))]
-        for spec, func in tests:
-            print(f"{spec:<24} {size:<10} {timeit(func, number=100):.3f}s")
-
-        from .crypto.low_level import hmac_sha256, blake2b_256
-
-        print("Cryptographic hashes / MACs ====================================")
-        size = "1GB"
-        for spec, func in [
-            ("hmac-sha256", lambda: hmac_sha256(key_256, random_10M)),
-            ("blake2b-256", lambda: blake2b_256(key_256, random_10M)),
-        ]:
-            print(f"{spec:<24} {size:<10} {timeit(func, number=100):.3f}s")
-
-        from .crypto.low_level import AES256_CTR_BLAKE2b, AES256_CTR_HMAC_SHA256
-        from .crypto.low_level import AES256_OCB, CHACHA20_POLY1305
-
-        print("Encryption =====================================================")
-        size = "1GB"
-
-        tests = [
-            (
-                "aes-256-ctr-hmac-sha256",
-                lambda: AES256_CTR_HMAC_SHA256(key_256, key_256, iv=key_128, header_len=1, aad_offset=1).encrypt(
-                    random_10M, header=b"X"
-                ),
-            ),
-            (
-                "aes-256-ctr-blake2b",
-                lambda: AES256_CTR_BLAKE2b(key_256 * 4, key_256, iv=key_128, header_len=1, aad_offset=1).encrypt(
-                    random_10M, header=b"X"
-                ),
-            ),
-            (
-                "aes-256-ocb",
-                lambda: AES256_OCB(key_256, iv=key_96, header_len=1, aad_offset=1).encrypt(random_10M, header=b"X"),
-            ),
-            (
-                "chacha20-poly1305",
-                lambda: CHACHA20_POLY1305(key_256, iv=key_96, header_len=1, aad_offset=1).encrypt(
-                    random_10M, header=b"X"
-                ),
-            ),
-        ]
-        for spec, func in tests:
-            print(f"{spec:<24} {size:<10} {timeit(func, number=100):.3f}s")
-
-        print("KDFs (slow is GOOD, use argon2!) ===============================")
-        count = 5
-        for spec, func in [
-            ("pbkdf2", lambda: FlexiKey.pbkdf2("mypassphrase", b"salt" * 8, PBKDF2_ITERATIONS, 32)),
-            ("argon2", lambda: FlexiKey.argon2("mypassphrase", 64, b"S" * ARGON2_SALT_BYTES, **ARGON2_ARGS)),
-        ]:
-            print(f"{spec:<24} {count:<10} {timeit(func, number=count):.3f}s")
-
-        from .compress import CompressionSpec
-
-        print("Compression ====================================================")
-        for spec in [
-            "lz4",
-            "zstd,1",
-            "zstd,3",
-            "zstd,5",
-            "zstd,10",
-            "zstd,16",
-            "zstd,22",
-            "zlib,0",
-            "zlib,6",
-            "zlib,9",
-            "lzma,0",
-            "lzma,6",
-            "lzma,9",
-        ]:
-            compressor = CompressionSpec(spec).compressor
-            size = "0.1GB"
-            print(f"{spec:<12} {size:<10} {timeit(lambda: compressor.compress(random_10M), number=10):.3f}s")
-
-        print("msgpack ========================================================")
-        item = Item(path="/foo/bar/baz", mode=660, mtime=1234567)
-        items = [item.as_dict()] * 1000
-        size = "100k Items"
-        spec = "msgpack"
-        print(f"{spec:<12} {size:<10} {timeit(lambda: msgpack.packb(items), number=100):.3f}s")
-
-        return 0
-
     @with_repository(fake="dry_run", exclusive=True, compatibility=(Manifest.Operation.WRITE,))
     @with_repository(fake="dry_run", exclusive=True, compatibility=(Manifest.Operation.WRITE,))
     def do_create(self, args, repository, manifest=None, key=None):
     def do_create(self, args, repository, manifest=None, key=None):
         """Create new archive"""
         """Create new archive"""
@@ -2757,97 +2553,7 @@ class Archiver(DebugMixIn, TarMixIn):
 
 
         subparsers = parser.add_subparsers(title="required arguments", metavar="<command>")
         subparsers = parser.add_subparsers(title="required arguments", metavar="<command>")
 
 
-        # borg benchmark
-        benchmark_epilog = process_epilog("These commands do various benchmarks.")
-
-        subparser = subparsers.add_parser(
-            "benchmark",
-            parents=[mid_common_parser],
-            add_help=False,
-            description="benchmark command",
-            epilog=benchmark_epilog,
-            formatter_class=argparse.RawDescriptionHelpFormatter,
-            help="benchmark command",
-        )
-
-        benchmark_parsers = subparser.add_subparsers(title="required arguments", metavar="<command>")
-        subparser.set_defaults(fallback_func=functools.partial(self.do_subcommand_help, subparser))
-
-        bench_crud_epilog = process_epilog(
-            """
-        This command benchmarks borg CRUD (create, read, update, delete) operations.
-
-        It creates input data below the given PATH and backups this data into the given REPO.
-        The REPO must already exist (it could be a fresh empty repo or an existing repo, the
-        command will create / read / update / delete some archives named borg-benchmark-crud\\* there.
-
-        Make sure you have free space there, you'll need about 1GB each (+ overhead).
-
-        If your repository is encrypted and borg needs a passphrase to unlock the key, use::
-
-            BORG_PASSPHRASE=mysecret borg benchmark crud REPO PATH
-
-        Measurements are done with different input file sizes and counts.
-        The file contents are very artificial (either all zero or all random),
-        thus the measurement results do not necessarily reflect performance with real data.
-        Also, due to the kind of content used, no compression is used in these benchmarks.
-
-        C- == borg create (1st archive creation, no compression, do not use files cache)
-              C-Z- == all-zero files. full dedup, this is primarily measuring reader/chunker/hasher.
-              C-R- == random files. no dedup, measuring throughput through all processing stages.
-
-        R- == borg extract (extract archive, dry-run, do everything, but do not write files to disk)
-              R-Z- == all zero files. Measuring heavily duplicated files.
-              R-R- == random files. No duplication here, measuring throughput through all processing
-              stages, except writing to disk.
-
-        U- == borg create (2nd archive creation of unchanged input files, measure files cache speed)
-              The throughput value is kind of virtual here, it does not actually read the file.
-              U-Z- == needs to check the 2 all-zero chunks' existence in the repo.
-              U-R- == needs to check existence of a lot of different chunks in the repo.
-
-        D- == borg delete archive (delete last remaining archive, measure deletion + compaction)
-              D-Z- == few chunks to delete / few segments to compact/remove.
-              D-R- == many chunks to delete / many segments to compact/remove.
-
-        Please note that there might be quite some variance in these measurements.
-        Try multiple measurements and having a otherwise idle machine (and network, if you use it).
-        """
-        )
-        subparser = benchmark_parsers.add_parser(
-            "crud",
-            parents=[common_parser],
-            add_help=False,
-            description=self.do_benchmark_crud.__doc__,
-            epilog=bench_crud_epilog,
-            formatter_class=argparse.RawDescriptionHelpFormatter,
-            help="benchmarks borg CRUD (create, extract, update, delete).",
-        )
-        subparser.set_defaults(func=self.do_benchmark_crud)
-
-        subparser.add_argument("path", metavar="PATH", help="path were to create benchmark input data")
-
-        bench_cpu_epilog = process_epilog(
-            """
-        This command benchmarks misc. CPU bound borg operations.
-
-        It creates input data in memory, runs the operation and then displays throughput.
-        To reduce outside influence on the timings, please make sure to run this with:
-
-        - an otherwise as idle as possible machine
-        - enough free memory so there will be no slow down due to paging activity
-        """
-        )
-        subparser = benchmark_parsers.add_parser(
-            "cpu",
-            parents=[common_parser],
-            add_help=False,
-            description=self.do_benchmark_cpu.__doc__,
-            epilog=bench_cpu_epilog,
-            formatter_class=argparse.RawDescriptionHelpFormatter,
-            help="benchmarks borg CPU bound operations.",
-        )
-        subparser.set_defaults(func=self.do_benchmark_cpu)
+        self.build_parser_benchmarks(subparsers, common_parser, mid_common_parser)
 
 
         # borg break-lock
         # borg break-lock
         break_lock_epilog = process_epilog(
         break_lock_epilog = process_epilog(

+ 312 - 0
src/borg/archiver/benchmarks.py

@@ -0,0 +1,312 @@
+import argparse
+from contextlib import contextmanager
+import functools
+import os
+import shutil
+import time
+
+from ..constants import *  # NOQA
+from ..crypto.key import FlexiKey
+from ..helpers import format_file_size
+from ..helpers import msgpack
+from ..item import Item
+from ..platform import SyncFile
+
+
+class BenchmarkMixIn:
+    def do_benchmark_crud(self, args):
+        """Benchmark Create, Read, Update, Delete for archives."""
+
+        def measurement_run(repo, path):
+            compression = "--compression=none"
+            # measure create perf (without files cache to always have it chunking)
+            t_start = time.monotonic()
+            rc = self.do_create(
+                self.parse_args(
+                    [f"--repo={repo}", "create", compression, "--files-cache=disabled", "borg-benchmark-crud1", path]
+                )
+            )
+            t_end = time.monotonic()
+            dt_create = t_end - t_start
+            assert rc == 0
+            # now build files cache
+            rc1 = self.do_create(
+                self.parse_args([f"--repo={repo}", "create", compression, "borg-benchmark-crud2", path])
+            )
+            rc2 = self.do_delete(self.parse_args([f"--repo={repo}", "delete", "-a", "borg-benchmark-crud2"]))
+            assert rc1 == rc2 == 0
+            # measure a no-change update (archive1 is still present)
+            t_start = time.monotonic()
+            rc1 = self.do_create(
+                self.parse_args([f"--repo={repo}", "create", compression, "borg-benchmark-crud3", path])
+            )
+            t_end = time.monotonic()
+            dt_update = t_end - t_start
+            rc2 = self.do_delete(self.parse_args([f"--repo={repo}", "delete", "-a", "borg-benchmark-crud3"]))
+            assert rc1 == rc2 == 0
+            # measure extraction (dry-run: without writing result to disk)
+            t_start = time.monotonic()
+            rc = self.do_extract(self.parse_args([f"--repo={repo}", "extract", "borg-benchmark-crud1", "--dry-run"]))
+            t_end = time.monotonic()
+            dt_extract = t_end - t_start
+            assert rc == 0
+            # measure archive deletion (of LAST present archive with the data)
+            t_start = time.monotonic()
+            rc = self.do_delete(self.parse_args([f"--repo={repo}", "delete", "-a", "borg-benchmark-crud1"]))
+            t_end = time.monotonic()
+            dt_delete = t_end - t_start
+            assert rc == 0
+            return dt_create, dt_update, dt_extract, dt_delete
+
+        @contextmanager
+        def test_files(path, count, size, random):
+            try:
+                path = os.path.join(path, "borg-test-data")
+                os.makedirs(path)
+                z_buff = None if random else memoryview(zeros)[:size] if size <= len(zeros) else b"\0" * size
+                for i in range(count):
+                    fname = os.path.join(path, "file_%d" % i)
+                    data = z_buff if not random else os.urandom(size)
+                    with SyncFile(fname, binary=True) as fd:  # used for posix_fadvise's sake
+                        fd.write(data)
+                yield path
+            finally:
+                shutil.rmtree(path)
+
+        if "_BORG_BENCHMARK_CRUD_TEST" in os.environ:
+            tests = [("Z-TEST", 1, 1, False), ("R-TEST", 1, 1, True)]
+        else:
+            tests = [
+                ("Z-BIG", 10, 100000000, False),
+                ("R-BIG", 10, 100000000, True),
+                ("Z-MEDIUM", 1000, 1000000, False),
+                ("R-MEDIUM", 1000, 1000000, True),
+                ("Z-SMALL", 10000, 10000, False),
+                ("R-SMALL", 10000, 10000, True),
+            ]
+
+        for msg, count, size, random in tests:
+            with test_files(args.path, count, size, random) as path:
+                dt_create, dt_update, dt_extract, dt_delete = measurement_run(args.location.canonical_path(), path)
+            total_size_MB = count * size / 1e06
+            file_size_formatted = format_file_size(size)
+            content = "random" if random else "all-zero"
+            fmt = "%s-%-10s %9.2f MB/s (%d * %s %s files: %.2fs)"
+            print(fmt % ("C", msg, total_size_MB / dt_create, count, file_size_formatted, content, dt_create))
+            print(fmt % ("R", msg, total_size_MB / dt_extract, count, file_size_formatted, content, dt_extract))
+            print(fmt % ("U", msg, total_size_MB / dt_update, count, file_size_formatted, content, dt_update))
+            print(fmt % ("D", msg, total_size_MB / dt_delete, count, file_size_formatted, content, dt_delete))
+
+        return 0
+
+    def do_benchmark_cpu(self, args):
+        """Benchmark CPU bound operations."""
+        from timeit import timeit
+
+        random_10M = os.urandom(10 * 1000 * 1000)
+        key_256 = os.urandom(32)
+        key_128 = os.urandom(16)
+        key_96 = os.urandom(12)
+
+        import io
+        from ..chunker import get_chunker
+
+        print("Chunkers =======================================================")
+        size = "1GB"
+
+        def chunkit(chunker_name, *args, **kwargs):
+            with io.BytesIO(random_10M) as data_file:
+                ch = get_chunker(chunker_name, *args, **kwargs)
+                for _ in ch.chunkify(fd=data_file):
+                    pass
+
+        for spec, func in [
+            ("buzhash,19,23,21,4095", lambda: chunkit("buzhash", 19, 23, 21, 4095, seed=0)),
+            ("fixed,1048576", lambda: chunkit("fixed", 1048576, sparse=False)),
+        ]:
+            print(f"{spec:<24} {size:<10} {timeit(func, number=100):.3f}s")
+
+        from ..checksums import crc32, xxh64
+
+        print("Non-cryptographic checksums / hashes ===========================")
+        size = "1GB"
+        tests = [("xxh64", lambda: xxh64(random_10M)), ("crc32 (zlib)", lambda: crc32(random_10M))]
+        for spec, func in tests:
+            print(f"{spec:<24} {size:<10} {timeit(func, number=100):.3f}s")
+
+        from ..crypto.low_level import hmac_sha256, blake2b_256
+
+        print("Cryptographic hashes / MACs ====================================")
+        size = "1GB"
+        for spec, func in [
+            ("hmac-sha256", lambda: hmac_sha256(key_256, random_10M)),
+            ("blake2b-256", lambda: blake2b_256(key_256, random_10M)),
+        ]:
+            print(f"{spec:<24} {size:<10} {timeit(func, number=100):.3f}s")
+
+        from ..crypto.low_level import AES256_CTR_BLAKE2b, AES256_CTR_HMAC_SHA256
+        from ..crypto.low_level import AES256_OCB, CHACHA20_POLY1305
+
+        print("Encryption =====================================================")
+        size = "1GB"
+
+        tests = [
+            (
+                "aes-256-ctr-hmac-sha256",
+                lambda: AES256_CTR_HMAC_SHA256(key_256, key_256, iv=key_128, header_len=1, aad_offset=1).encrypt(
+                    random_10M, header=b"X"
+                ),
+            ),
+            (
+                "aes-256-ctr-blake2b",
+                lambda: AES256_CTR_BLAKE2b(key_256 * 4, key_256, iv=key_128, header_len=1, aad_offset=1).encrypt(
+                    random_10M, header=b"X"
+                ),
+            ),
+            (
+                "aes-256-ocb",
+                lambda: AES256_OCB(key_256, iv=key_96, header_len=1, aad_offset=1).encrypt(random_10M, header=b"X"),
+            ),
+            (
+                "chacha20-poly1305",
+                lambda: CHACHA20_POLY1305(key_256, iv=key_96, header_len=1, aad_offset=1).encrypt(
+                    random_10M, header=b"X"
+                ),
+            ),
+        ]
+        for spec, func in tests:
+            print(f"{spec:<24} {size:<10} {timeit(func, number=100):.3f}s")
+
+        print("KDFs (slow is GOOD, use argon2!) ===============================")
+        count = 5
+        for spec, func in [
+            ("pbkdf2", lambda: FlexiKey.pbkdf2("mypassphrase", b"salt" * 8, PBKDF2_ITERATIONS, 32)),
+            ("argon2", lambda: FlexiKey.argon2("mypassphrase", 64, b"S" * ARGON2_SALT_BYTES, **ARGON2_ARGS)),
+        ]:
+            print(f"{spec:<24} {count:<10} {timeit(func, number=count):.3f}s")
+
+        from ..compress import CompressionSpec
+
+        print("Compression ====================================================")
+        for spec in [
+            "lz4",
+            "zstd,1",
+            "zstd,3",
+            "zstd,5",
+            "zstd,10",
+            "zstd,16",
+            "zstd,22",
+            "zlib,0",
+            "zlib,6",
+            "zlib,9",
+            "lzma,0",
+            "lzma,6",
+            "lzma,9",
+        ]:
+            compressor = CompressionSpec(spec).compressor
+            size = "0.1GB"
+            print(f"{spec:<12} {size:<10} {timeit(lambda: compressor.compress(random_10M), number=10):.3f}s")
+
+        print("msgpack ========================================================")
+        item = Item(path="/foo/bar/baz", mode=660, mtime=1234567)
+        items = [item.as_dict()] * 1000
+        size = "100k Items"
+        spec = "msgpack"
+        print(f"{spec:<12} {size:<10} {timeit(lambda: msgpack.packb(items), number=100):.3f}s")
+
+        return 0
+
+    def build_parser_benchmarks(self, subparsers, common_parser, mid_common_parser):
+
+        from .common import process_epilog
+
+        benchmark_epilog = process_epilog("These commands do various benchmarks.")
+
+        subparser = subparsers.add_parser(
+            "benchmark",
+            parents=[mid_common_parser],
+            add_help=False,
+            description="benchmark command",
+            epilog=benchmark_epilog,
+            formatter_class=argparse.RawDescriptionHelpFormatter,
+            help="benchmark command",
+        )
+
+        benchmark_parsers = subparser.add_subparsers(title="required arguments", metavar="<command>")
+        subparser.set_defaults(fallback_func=functools.partial(self.do_subcommand_help, subparser))
+
+        bench_crud_epilog = process_epilog(
+            """
+        This command benchmarks borg CRUD (create, read, update, delete) operations.
+
+        It creates input data below the given PATH and backups this data into the given REPO.
+        The REPO must already exist (it could be a fresh empty repo or an existing repo, the
+        command will create / read / update / delete some archives named borg-benchmark-crud\\* there.
+
+        Make sure you have free space there, you'll need about 1GB each (+ overhead).
+
+        If your repository is encrypted and borg needs a passphrase to unlock the key, use::
+
+            BORG_PASSPHRASE=mysecret borg benchmark crud REPO PATH
+
+        Measurements are done with different input file sizes and counts.
+        The file contents are very artificial (either all zero or all random),
+        thus the measurement results do not necessarily reflect performance with real data.
+        Also, due to the kind of content used, no compression is used in these benchmarks.
+
+        C- == borg create (1st archive creation, no compression, do not use files cache)
+              C-Z- == all-zero files. full dedup, this is primarily measuring reader/chunker/hasher.
+              C-R- == random files. no dedup, measuring throughput through all processing stages.
+
+        R- == borg extract (extract archive, dry-run, do everything, but do not write files to disk)
+              R-Z- == all zero files. Measuring heavily duplicated files.
+              R-R- == random files. No duplication here, measuring throughput through all processing
+              stages, except writing to disk.
+
+        U- == borg create (2nd archive creation of unchanged input files, measure files cache speed)
+              The throughput value is kind of virtual here, it does not actually read the file.
+              U-Z- == needs to check the 2 all-zero chunks' existence in the repo.
+              U-R- == needs to check existence of a lot of different chunks in the repo.
+
+        D- == borg delete archive (delete last remaining archive, measure deletion + compaction)
+              D-Z- == few chunks to delete / few segments to compact/remove.
+              D-R- == many chunks to delete / many segments to compact/remove.
+
+        Please note that there might be quite some variance in these measurements.
+        Try multiple measurements and having a otherwise idle machine (and network, if you use it).
+        """
+        )
+        subparser = benchmark_parsers.add_parser(
+            "crud",
+            parents=[common_parser],
+            add_help=False,
+            description=self.do_benchmark_crud.__doc__,
+            epilog=bench_crud_epilog,
+            formatter_class=argparse.RawDescriptionHelpFormatter,
+            help="benchmarks borg CRUD (create, extract, update, delete).",
+        )
+        subparser.set_defaults(func=self.do_benchmark_crud)
+
+        subparser.add_argument("path", metavar="PATH", help="path were to create benchmark input data")
+
+        bench_cpu_epilog = process_epilog(
+            """
+        This command benchmarks misc. CPU bound borg operations.
+
+        It creates input data in memory, runs the operation and then displays throughput.
+        To reduce outside influence on the timings, please make sure to run this with:
+
+        - an otherwise as idle as possible machine
+        - enough free memory so there will be no slow down due to paging activity
+        """
+        )
+        subparser = benchmark_parsers.add_parser(
+            "cpu",
+            parents=[common_parser],
+            add_help=False,
+            description=self.do_benchmark_cpu.__doc__,
+            epilog=bench_cpu_epilog,
+            formatter_class=argparse.RawDescriptionHelpFormatter,
+            help="benchmarks borg CPU bound operations.",
+        )
+        subparser.set_defaults(func=self.do_benchmark_cpu)