3 years ago · 86fe8bdd57
--- a/docs/faq.rst
+++ b/docs/faq.rst
@@ -132,9 +132,6 @@ Which file types, attributes, etc. are *not* preserved?
 
				       Archive extraction has optional support to extract all-zero chunks as
			
 
				       holes in a sparse file.
			
 
				     * Some filesystem specific attributes, like btrfs NOCOW, see :ref:`platforms`.
			
 
				-    * For hardlinked symlinks, the hardlinking can not be archived (and thus,
			
 
				-      the hardlinking will not be done at extraction time). The symlinks will
			
 
				-      be archived and extracted as non-hardlinked symlinks, see :issue:`2379`.
			
 
				 
			
 
				 Are there other known limitations?
			
 
				 ----------------------------------
			
--- a/docs/internals/data-structures.rst
+++ b/docs/internals/data-structures.rst
@@ -567,7 +567,7 @@ dictionary created by the ``Item`` class that contains:
 
				 * uid
			
 
				 * gid
			
 
				 * mode (item type + permissions)
			
 
				-* source (for symlinks, and for hardlinks within one archive)
			
 
				+* source (for symlinks)
			
 
				 * rdev (for device files)
			
 
				 * mtime, atime, ctime in nanoseconds
			
 
				 * xattrs
			
--- a/docs/usage/general/file-metadata.rst.inc
+++ b/docs/usage/general/file-metadata.rst.inc
@@ -10,7 +10,7 @@ Besides regular file and directory structures, Borg can preserve
 
				   * FIFOs ("named pipes")
			
 
				   * special file *contents* can be backed up in ``--read-special`` mode.
			
 
				     By default the metadata to create them with mknod(2), mkfifo(2) etc. is stored.
			
 
				-* hardlinked regular files, devices, FIFOs (considering all items in the same archive)
			
 
				+* hardlinked regular files, devices, symlinks, FIFOs (considering all items in the same archive)
			
 
				 * timestamps in nanosecond precision: mtime, atime, ctime
			
 
				 * other timestamps: birthtime (on platforms supporting it)
			
 
				 * permissions:
			
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@@ -28,7 +28,7 @@ from .constants import *  # NOQA
 
				 from .crypto.low_level import IntegrityError as IntegrityErrorBase
			
 
				 from .hashindex import ChunkIndex, ChunkIndexEntry, CacheSynchronizer
			
 
				 from .helpers import Manifest
			
 
				-from .helpers import hardlinkable
			
 
				+from .helpers import HardLinkManager
			
 
				 from .helpers import ChunkIteratorFileWrapper, open_item
			
 
				 from .helpers import Error, IntegrityError, set_ec
			
 
				 from .platform import uid2user, user2uid, gid2group, group2gid
			
@@ -280,7 +280,7 @@ class DownloadPipeline:
 
				         self.repository = repository
			
 
				         self.key = key
			
 
				 
			
 
				-    def unpack_many(self, ids, filter=None, partial_extract=False, preload=False, hardlink_masters=None):
			
 
				+    def unpack_many(self, ids, *, filter=None, preload=False):
			
 
				         """
			
 
				         Return iterator of items.
			
 
				 
			
@@ -290,10 +290,7 @@ class DownloadPipeline:
 
				         Warning: if *preload* is True then all data chunks of every yielded item have to be retrieved,
			
 
				         otherwise preloaded chunks will accumulate in RemoteRepository and create a memory leak.
			
 
				         """
			
 
				-        def _preload(chunks):
			
 
				-            self.repository.preload([c.id for c in chunks])
			
 
				-
			
 
				-        masters_preloaded = set()
			
 
				+        hlids_preloaded = set()
			
 
				         unpacker = msgpack.Unpacker(use_list=False)
			
 
				         for data in self.fetch_many(ids):
			
 
				             unpacker.feed(data)
			
@@ -306,33 +303,20 @@ class DownloadPipeline:
 
				                 items = [item for item in items if filter(item)]
			
 
				 
			
 
				             if preload:
			
 
				-                if filter and partial_extract:
			
 
				-                    # if we do only a partial extraction, it gets a bit
			
 
				-                    # complicated with computing the preload items: if a hardlink master item is not
			
 
				-                    # selected (== not extracted), we will still need to preload its chunks if a
			
 
				-                    # corresponding hardlink slave is selected (== is extracted).
			
 
				-                    # due to a side effect of the filter() call, we now have hardlink_masters dict populated.
			
 
				-                    for item in items:
			
 
				-                        if hardlinkable(item.mode):
			
 
				-                            source = item.get('source')
			
 
				-                            if source is None:  # maybe a hardlink master
			
 
				-                                if 'chunks' in item:
			
 
				-                                    _preload(item.chunks)
			
 
				-                                # if this is a hl master, remember that we already preloaded all chunks of it (if any):
			
 
				-                                if item.get('hardlink_master', True):
			
 
				-                                    masters_preloaded.add(item.path)
			
 
				-                            else:  # hardlink slave
			
 
				-                                if source not in masters_preloaded:
			
 
				-                                    # we only need to preload *once* (for the 1st selected slave)
			
 
				-                                    chunks, _ = hardlink_masters[source]
			
 
				-                                    if chunks is not None:
			
 
				-                                        _preload(chunks)
			
 
				-                                    masters_preloaded.add(source)
			
 
				-                else:
			
 
				-                    # easy: we do not have a filter, thus all items are selected, thus we need to preload all chunks.
			
 
				-                    for item in items:
			
 
				-                        if 'chunks' in item:
			
 
				-                            _preload(item.chunks)
			
 
				+                for item in items:
			
 
				+                    if 'chunks' in item:
			
 
				+                        hlid = item.get('hlid', None)
			
 
				+                        if hlid is None:
			
 
				+                            preload_chunks = True
			
 
				+                        else:
			
 
				+                            if hlid in hlids_preloaded:
			
 
				+                                preload_chunks = False
			
 
				+                            else:
			
 
				+                                # not having the hardlink's chunks already preloaded for other hardlink to same inode
			
 
				+                                preload_chunks = True
			
 
				+                                hlids_preloaded.add(hlid)
			
 
				+                        if preload_chunks:
			
 
				+                            self.repository.preload([c.id for c in item.chunks])
			
 
				 
			
 
				             for item in items:
			
 
				                 yield item
			
@@ -443,7 +427,6 @@ class Archive:
 
				         self.repository = repository
			
 
				         self.cache = cache
			
 
				         self.manifest = manifest
			
 
				-        self.hard_links = {}
			
 
				         self.stats = Statistics(output_json=log_json, iec=iec)
			
 
				         self.iec = iec
			
 
				         self.show_progress = progress
			
@@ -489,7 +472,7 @@ class Archive:
 
				     def _load_meta(self, id):
			
 
				         data = self.key.decrypt(id, self.repository.get(id))
			
 
				         metadata = ArchiveItem(internal_dict=msgpack.unpackb(data))
			
 
				-        if metadata.version != 1:
			
 
				+        if metadata.version not in (1, 2):  # legacy: still need to read v1 archives
			
 
				             raise Exception('Unknown archive metadata version')
			
 
				         return metadata
			
 
				 
			
@@ -584,12 +567,10 @@ Utilization of max. archive size: {csize_max:.0%}
 
				             return False
			
 
				         return filter(item) if filter else True
			
 
				 
			
 
				-    def iter_items(self, filter=None, partial_extract=False, preload=False, hardlink_masters=None):
			
 
				+    def iter_items(self, filter=None, preload=False):
			
 
				         # note: when calling this with preload=True, later fetch_many() must be called with
			
 
				         # is_preloaded=True or the RemoteRepository code will leak memory!
			
 
				-        assert not (filter and partial_extract and preload) or hardlink_masters is not None
			
 
				-        for item in self.pipeline.unpack_many(self.metadata.items, partial_extract=partial_extract,
			
 
				-                                              preload=preload, hardlink_masters=hardlink_masters,
			
 
				+        for item in self.pipeline.unpack_many(self.metadata.items, preload=preload,
			
 
				                                               filter=lambda item: self.item_filter(item, filter)):
			
 
				             yield item
			
 
				 
			
@@ -620,7 +601,7 @@ Utilization of max. archive size: {csize_max:.0%}
 
				         self.start = start
			
 
				         self.end = end
			
 
				         metadata = {
			
 
				-            'version': 1,
			
 
				+            'version': 2,
			
 
				             'name': name,
			
 
				             'comment': comment or '',
			
 
				             'items': self.items_buffer.chunks,
			
@@ -719,33 +700,30 @@ Utilization of max. archive size: {csize_max:.0%}
 
				         return stats
			
 
				 
			
 
				     @contextmanager
			
 
				-    def extract_helper(self, dest, item, path, stripped_components, original_path, hardlink_masters):
			
 
				+    def extract_helper(self, item, path, hlm, *, dry_run=False):
			
 
				         hardlink_set = False
			
 
				         # Hard link?
			
 
				-        if 'source' in item:
			
 
				-            source = os.path.join(dest, *item.source.split(os.sep)[stripped_components:])
			
 
				-            chunks, link_target = hardlink_masters.get(item.source, (None, source))
			
 
				-            if link_target and has_link:
			
 
				-                # Hard link was extracted previously, just link
			
 
				-                with backup_io('link'):
			
 
				-                    os.link(link_target, path)
			
 
				-                    hardlink_set = True
			
 
				-            elif chunks is not None:
			
 
				-                # assign chunks to this item, since the item which had the chunks was not extracted
			
 
				-                item.chunks = chunks
			
 
				+        if 'hlid' in item:
			
 
				+            link_target = hlm.retrieve(id=item.hlid)
			
 
				+            if link_target is not None and has_link:
			
 
				+                if not dry_run:
			
 
				+                    # another hardlink to same inode (same hlid) was extracted previously, just link to it
			
 
				+                    with backup_io('link'):
			
 
				+                        os.link(link_target, path, follow_symlinks=False)
			
 
				+                hardlink_set = True
			
 
				         yield hardlink_set
			
 
				-        if not hardlink_set and hardlink_masters:
			
 
				-            if has_link:
			
 
				-                # Update master entry with extracted item path, so that following hardlinks don't extract twice.
			
 
				+        if not hardlink_set:
			
 
				+            if 'hlid' in item and has_link:
			
 
				+                # Update entry with extracted item path, so that following hardlinks don't extract twice.
			
 
				                 # We have hardlinking support, so we will hardlink not extract.
			
 
				-                hardlink_masters[item.get('source') or original_path] = (None, path)
			
 
				+                hlm.remember(id=item.hlid, info=path)
			
 
				             else:
			
 
				                 # Broken platform with no hardlinking support.
			
 
				                 # In this case, we *want* to extract twice, because there is no other way.
			
 
				                 pass
			
 
				 
			
 
				     def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False,
			
 
				-                     hardlink_masters=None, stripped_components=0, original_path=None, pi=None):
			
 
				+                     hlm=None, stripped_components=0, original_path=None, pi=None):
			
 
				         """
			
 
				         Extract archive item.
			
 
				 
			
@@ -754,29 +732,33 @@ Utilization of max. archive size: {csize_max:.0%}
 
				         :param dry_run: do not write any data
			
 
				         :param stdout: write extracted data to stdout
			
 
				         :param sparse: write sparse files (chunk-granularity, independent of the original being sparse)
			
 
				-        :param hardlink_masters: maps paths to (chunks, link_target) for extracting subtrees with hardlinks correctly
			
 
				+        :param hlm: maps hlid to link_target for extracting subtrees with hardlinks correctly
			
 
				         :param stripped_components: stripped leading path components to correct hard link extraction
			
 
				         :param original_path: 'path' key as stored in archive
			
 
				         :param pi: ProgressIndicatorPercent (or similar) for file extraction progress (in bytes)
			
 
				         """
			
 
				-        hardlink_masters = hardlink_masters or {}
			
 
				         has_damaged_chunks = 'chunks_healthy' in item
			
 
				         if dry_run or stdout:
			
 
				-            if 'chunks' in item:
			
 
				-                item_chunks_size = 0
			
 
				-                for data in self.pipeline.fetch_many([c.id for c in item.chunks], is_preloaded=True):
			
 
				-                    if pi:
			
 
				-                        pi.show(increase=len(data), info=[remove_surrogates(item.path)])
			
 
				-                    if stdout:
			
 
				-                        sys.stdout.buffer.write(data)
			
 
				-                    item_chunks_size += len(data)
			
 
				-                if stdout:
			
 
				-                    sys.stdout.buffer.flush()
			
 
				-                if 'size' in item:
			
 
				-                    item_size = item.size
			
 
				-                    if item_size != item_chunks_size:
			
 
				-                        raise BackupError('Size inconsistency detected: size {}, chunks size {}'.format(
			
 
				-                                          item_size, item_chunks_size))
			
 
				+            with self.extract_helper(item, '', hlm, dry_run=dry_run or stdout) as hardlink_set:
			
 
				+                if not hardlink_set:
			
 
				+                    # it does not really set hardlinks due to dry_run, but we need to behave same
			
 
				+                    # as non-dry_run concerning fetching preloaded chunks from the pipeline or
			
 
				+                    # it would get stuck.
			
 
				+                    if 'chunks' in item:
			
 
				+                        item_chunks_size = 0
			
 
				+                        for data in self.pipeline.fetch_many([c.id for c in item.chunks], is_preloaded=True):
			
 
				+                            if pi:
			
 
				+                                pi.show(increase=len(data), info=[remove_surrogates(item.path)])
			
 
				+                            if stdout:
			
 
				+                                sys.stdout.buffer.write(data)
			
 
				+                            item_chunks_size += len(data)
			
 
				+                        if stdout:
			
 
				+                            sys.stdout.buffer.flush()
			
 
				+                        if 'size' in item:
			
 
				+                            item_size = item.size
			
 
				+                            if item_size != item_chunks_size:
			
 
				+                                raise BackupError('Size inconsistency detected: size {}, chunks size {}'.format(
			
 
				+                                                  item_size, item_chunks_size))
			
 
				             if has_damaged_chunks:
			
 
				                 raise BackupError('File has damaged (all-zero) chunks. Try running borg check --repair.')
			
 
				             return
			
@@ -807,8 +789,7 @@ Utilization of max. archive size: {csize_max:.0%}
 
				         if stat.S_ISREG(mode):
			
 
				             with backup_io('makedirs'):
			
 
				                 make_parent(path)
			
 
				-            with self.extract_helper(dest, item, path, stripped_components, original_path,
			
 
				-                                     hardlink_masters) as hardlink_set:
			
 
				+            with self.extract_helper(item, path, hlm) as hardlink_set:
			
 
				                 if hardlink_set:
			
 
				                     return
			
 
				                 with backup_io('open'):
			
@@ -847,24 +828,26 @@ Utilization of max. archive size: {csize_max:.0%}
 
				                     self.restore_attrs(path, item)
			
 
				             elif stat.S_ISLNK(mode):
			
 
				                 make_parent(path)
			
 
				-                source = item.source
			
 
				-                try:
			
 
				-                    os.symlink(source, path)
			
 
				-                except UnicodeEncodeError:
			
 
				-                    raise self.IncompatibleFilesystemEncodingError(source, sys.getfilesystemencoding()) from None
			
 
				-                self.restore_attrs(path, item, symlink=True)
			
 
				+                with self.extract_helper(item, path, hlm) as hardlink_set:
			
 
				+                    if hardlink_set:
			
 
				+                        # unusual, but possible: this is a hardlinked symlink.
			
 
				+                        return
			
 
				+                    source = item.source
			
 
				+                    try:
			
 
				+                        os.symlink(source, path)
			
 
				+                    except UnicodeEncodeError:
			
 
				+                        raise self.IncompatibleFilesystemEncodingError(source, sys.getfilesystemencoding()) from None
			
 
				+                    self.restore_attrs(path, item, symlink=True)
			
 
				             elif stat.S_ISFIFO(mode):
			
 
				                 make_parent(path)
			
 
				-                with self.extract_helper(dest, item, path, stripped_components, original_path,
			
 
				-                                         hardlink_masters) as hardlink_set:
			
 
				+                with self.extract_helper(item, path, hlm) as hardlink_set:
			
 
				                     if hardlink_set:
			
 
				                         return
			
 
				                     os.mkfifo(path)
			
 
				                     self.restore_attrs(path, item)
			
 
				             elif stat.S_ISCHR(mode) or stat.S_ISBLK(mode):
			
 
				                 make_parent(path)
			
 
				-                with self.extract_helper(dest, item, path, stripped_components, original_path,
			
 
				-                                         hardlink_masters) as hardlink_set:
			
 
				+                with self.extract_helper(item, path, hlm) as hardlink_set:
			
 
				                     if hardlink_set:
			
 
				                         return
			
 
				                     os.mknod(path, item.mode, item.rdev)
			
@@ -1041,79 +1024,43 @@ Utilization of max. archive size: {csize_max:.0%}
 
				         :param can_compare_chunk_ids: Whether --chunker-params are the same for both archives.
			
 
				         """
			
 
				 
			
 
				-        def hardlink_master_seen(item):
			
 
				-            return 'source' not in item or not hardlinkable(item.mode) or item.source in hardlink_masters
			
 
				-
			
 
				-        def is_hardlink_master(item):
			
 
				-            return item.get('hardlink_master', True) and 'source' not in item and hardlinkable(item.mode)
			
 
				-
			
 
				-        def update_hardlink_masters(item1, item2):
			
 
				-            if is_hardlink_master(item1) or is_hardlink_master(item2):
			
 
				-                hardlink_masters[item1.path] = (item1, item2)
			
 
				-
			
 
				-        def has_hardlink_master(item, hardlink_masters):
			
 
				-            return hardlinkable(item.mode) and item.get('source') in hardlink_masters
			
 
				-
			
 
				         def compare_items(item1, item2):
			
 
				-            if has_hardlink_master(item1, hardlink_masters):
			
 
				-                item1 = hardlink_masters[item1.source][0]
			
 
				-            if has_hardlink_master(item2, hardlink_masters):
			
 
				-                item2 = hardlink_masters[item2.source][1]
			
 
				             return ItemDiff(item1, item2,
			
 
				                             archive1.pipeline.fetch_many([c.id for c in item1.get('chunks', [])]),
			
 
				                             archive2.pipeline.fetch_many([c.id for c in item2.get('chunks', [])]),
			
 
				                             can_compare_chunk_ids=can_compare_chunk_ids)
			
 
				 
			
 
				-        def defer_if_necessary(item1, item2):
			
 
				-            """Adds item tuple to deferred if necessary and returns True, if items were deferred"""
			
 
				-            update_hardlink_masters(item1, item2)
			
 
				-            defer = not hardlink_master_seen(item1) or not hardlink_master_seen(item2)
			
 
				-            if defer:
			
 
				-                deferred.append((item1, item2))
			
 
				-            return defer
			
 
				-
			
 
				         orphans_archive1 = OrderedDict()
			
 
				         orphans_archive2 = OrderedDict()
			
 
				-        deferred = []
			
 
				-        hardlink_masters = {}
			
 
				 
			
 
				         for item1, item2 in zip_longest(
			
 
				                 archive1.iter_items(lambda item: matcher.match(item.path)),
			
 
				                 archive2.iter_items(lambda item: matcher.match(item.path)),
			
 
				         ):
			
 
				             if item1 and item2 and item1.path == item2.path:
			
 
				-                if not defer_if_necessary(item1, item2):
			
 
				-                    yield (item1.path, compare_items(item1, item2))
			
 
				+                yield (item1.path, compare_items(item1, item2))
			
 
				                 continue
			
 
				             if item1:
			
 
				                 matching_orphan = orphans_archive2.pop(item1.path, None)
			
 
				                 if matching_orphan:
			
 
				-                    if not defer_if_necessary(item1, matching_orphan):
			
 
				-                        yield (item1.path, compare_items(item1, matching_orphan))
			
 
				+                    yield (item1.path, compare_items(item1, matching_orphan))
			
 
				                 else:
			
 
				                     orphans_archive1[item1.path] = item1
			
 
				             if item2:
			
 
				                 matching_orphan = orphans_archive1.pop(item2.path, None)
			
 
				                 if matching_orphan:
			
 
				-                    if not defer_if_necessary(matching_orphan, item2):
			
 
				-                        yield (matching_orphan.path, compare_items(matching_orphan, item2))
			
 
				+                    yield (matching_orphan.path, compare_items(matching_orphan, item2))
			
 
				                 else:
			
 
				                     orphans_archive2[item2.path] = item2
			
 
				         # At this point orphans_* contain items that had no matching partner in the other archive
			
 
				         for added in orphans_archive2.values():
			
 
				             path = added.path
			
 
				             deleted_item = Item.create_deleted(path)
			
 
				-            update_hardlink_masters(deleted_item, added)
			
 
				             yield (path, compare_items(deleted_item, added))
			
 
				         for deleted in orphans_archive1.values():
			
 
				             path = deleted.path
			
 
				             deleted_item = Item.create_deleted(path)
			
 
				-            update_hardlink_masters(deleted, deleted_item)
			
 
				             yield (path, compare_items(deleted, deleted_item))
			
 
				-        for item1, item2 in deferred:
			
 
				-            assert hardlink_master_seen(item1)
			
 
				-            assert hardlink_master_seen(item2)
			
 
				-            yield (path, compare_items(item1, item2))
			
 
				 
			
 
				 
			
 
				 class MetadataCollector:
			
@@ -1289,7 +1236,7 @@ class FilesystemObjectProcessors:
 
				         self.show_progress = show_progress
			
 
				         self.print_file_status = file_status_printer or (lambda *args: None)
			
 
				 
			
 
				-        self.hard_links = {}
			
 
				+        self.hlm = HardLinkManager(id_type=tuple, info_type=(list, type(None)))  # (dev, ino) -> chunks or None
			
 
				         self.stats = Statistics(output_json=log_json, iec=iec)  # threading: done by cache (including progress)
			
 
				         self.cwd = os.getcwd()
			
 
				         self.chunker = get_chunker(*chunker_params, seed=key.chunk_seed, sparse=sparse)
			
@@ -1298,29 +1245,32 @@ class FilesystemObjectProcessors:
 
				     def create_helper(self, path, st, status=None, hardlinkable=True):
			
 
				         safe_path = make_path_safe(path)
			
 
				         item = Item(path=safe_path)
			
 
				-        hardlink_master = False
			
 
				         hardlinked = hardlinkable and st.st_nlink > 1
			
 
				+        update_map = False
			
 
				         if hardlinked:
			
 
				-            source = self.hard_links.get((st.st_ino, st.st_dev))
			
 
				-            if source is not None:
			
 
				-                item.source = source
			
 
				-                status = 'h'  # hardlink (to already seen inodes)
			
 
				-            else:
			
 
				-                hardlink_master = True
			
 
				-        yield item, status, hardlinked, hardlink_master
			
 
				-        # if we get here, "with"-block worked ok without error/exception, the item was processed ok...
			
 
				+            status = 'h'  # hardlink
			
 
				+            nothing = object()
			
 
				+            chunks = self.hlm.retrieve(id=(st.st_ino, st.st_dev), default=nothing)
			
 
				+            if chunks is nothing:
			
 
				+                update_map = True
			
 
				+            elif chunks is not None:
			
 
				+                item.chunks = chunks
			
 
				+            item.hlid = self.hlm.hardlink_id_from_inode(ino=st.st_ino, dev=st.st_dev)
			
 
				+        yield item, status, hardlinked
			
 
				         self.add_item(item, stats=self.stats)
			
 
				-        # ... and added to the archive, so we can remember it to refer to it later in the archive:
			
 
				-        if hardlink_master:
			
 
				-            self.hard_links[(st.st_ino, st.st_dev)] = safe_path
			
 
				+        if update_map:
			
 
				+            # remember the hlid of this fs object and if the item has chunks,
			
 
				+            # also remember them, so we do not have to re-chunk a hardlink.
			
 
				+            chunks = item.chunks if 'chunks' in item else None
			
 
				+            self.hlm.remember(id=(st.st_ino, st.st_dev), info=chunks)
			
 
				 
			
 
				     def process_dir_with_fd(self, *, path, fd, st):
			
 
				-        with self.create_helper(path, st, 'd', hardlinkable=False) as (item, status, hardlinked, hardlink_master):
			
 
				+        with self.create_helper(path, st, 'd', hardlinkable=False) as (item, status, hardlinked):
			
 
				             item.update(self.metadata_collector.stat_attrs(st, path, fd=fd))
			
 
				             return status
			
 
				 
			
 
				     def process_dir(self, *, path, parent_fd, name, st):
			
 
				-        with self.create_helper(path, st, 'd', hardlinkable=False) as (item, status, hardlinked, hardlink_master):
			
 
				+        with self.create_helper(path, st, 'd', hardlinkable=False) as (item, status, hardlinked):
			
 
				             with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_dir,
			
 
				                         noatime=True, op='dir_open') as fd:
			
 
				                 # fd is None for directories on windows, in that case a race condition check is not possible.
			
@@ -1331,7 +1281,7 @@ class FilesystemObjectProcessors:
 
				                 return status
			
 
				 
			
 
				     def process_fifo(self, *, path, parent_fd, name, st):
			
 
				-        with self.create_helper(path, st, 'f') as (item, status, hardlinked, hardlink_master):  # fifo
			
 
				+        with self.create_helper(path, st, 'f') as (item, status, hardlinked):  # fifo
			
 
				             with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_normal, noatime=True) as fd:
			
 
				                 with backup_io('fstat'):
			
 
				                     st = stat_update_check(st, os.fstat(fd))
			
@@ -1339,7 +1289,7 @@ class FilesystemObjectProcessors:
 
				                 return status
			
 
				 
			
 
				     def process_dev(self, *, path, parent_fd, name, st, dev_type):
			
 
				-        with self.create_helper(path, st, dev_type) as (item, status, hardlinked, hardlink_master):  # char/block device
			
 
				+        with self.create_helper(path, st, dev_type) as (item, status, hardlinked):  # char/block device
			
 
				             # looks like we can not work fd-based here without causing issues when trying to open/close the device
			
 
				             with backup_io('stat'):
			
 
				                 st = stat_update_check(st, os_stat(path=path, parent_fd=parent_fd, name=name, follow_symlinks=False))
			
@@ -1348,10 +1298,7 @@ class FilesystemObjectProcessors:
 
				             return status
			
 
				 
			
 
				     def process_symlink(self, *, path, parent_fd, name, st):
			
 
				-        # note: using hardlinkable=False because we can not support hardlinked symlinks,
			
 
				-        #       due to the dual-use of item.source, see issue #2343:
			
 
				-        # hardlinked symlinks will be archived [and extracted] as non-hardlinked symlinks.
			
 
				-        with self.create_helper(path, st, 's', hardlinkable=False) as (item, status, hardlinked, hardlink_master):
			
 
				+        with self.create_helper(path, st, 's', hardlinkable=True) as (item, status, hardlinked):
			
 
				             fname = name if name is not None and parent_fd is not None else path
			
 
				             with backup_io('readlink'):
			
 
				                 source = os.readlink(fname, dir_fd=parent_fd)
			
@@ -1384,7 +1331,7 @@ class FilesystemObjectProcessors:
 
				         return status
			
 
				 
			
 
				     def process_file(self, *, path, parent_fd, name, st, cache, flags=flags_normal):
			
 
				-        with self.create_helper(path, st, None) as (item, status, hardlinked, hardlink_master):  # no status yet
			
 
				+        with self.create_helper(path, st, None) as (item, status, hardlinked):  # no status yet
			
 
				             with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags, noatime=True) as fd:
			
 
				                 with backup_io('fstat'):
			
 
				                     st = stat_update_check(st, os.fstat(fd))
			
@@ -1395,7 +1342,9 @@ class FilesystemObjectProcessors:
 
				                     # so it can be extracted / accessed in FUSE mount like a regular file.
			
 
				                     # this needs to be done early, so that part files also get the patched mode.
			
 
				                     item.mode = stat.S_IFREG | stat.S_IMODE(item.mode)
			
 
				-                if not hardlinked or hardlink_master:
			
 
				+                if 'chunks' in item:  # create_helper might have put chunks from a previous hardlink there
			
 
				+                    [cache.chunk_incref(id_, self.stats) for id_, _, _ in item.chunks]
			
 
				+                else:  # normal case, no "2nd+" hardlink
			
 
				                     if not is_special_file:
			
 
				                         hashed_path = safe_encode(os.path.join(self.cwd, path))
			
 
				                         path_hash = self.key.id_hash(hashed_path)
			
@@ -1420,7 +1369,6 @@ class FilesystemObjectProcessors:
 
				                         status = 'M' if known else 'A'  # regular file, modified or added
			
 
				                     self.print_file_status(status, path)
			
 
				                     status = None  # we already printed the status
			
 
				-                    item.hardlink_master = hardlinked
			
 
				                     # Only chunkify the file if needed
			
 
				                     if chunks is not None:
			
 
				                         item.chunks = chunks
			
@@ -1444,7 +1392,7 @@ class FilesystemObjectProcessors:
 
				                             # also, we must not memorize a potentially inconsistent/corrupt file that
			
 
				                             # changed while we backed it up.
			
 
				                             cache.memorize_file(hashed_path, path_hash, st, [c.id for c in item.chunks])
			
 
				-                    self.stats.nfiles += 1
			
 
				+                self.stats.nfiles += 1
			
 
				                 item.update(self.metadata_collector.stat_ext_attrs(st, path, fd=fd))
			
 
				                 item.get_size(memorize=True)
			
 
				                 return status
			
@@ -1464,6 +1412,7 @@ class TarfileObjectProcessors:
 
				 
			
 
				         self.stats = Statistics(output_json=log_json, iec=iec)  # threading: done by cache (including progress)
			
 
				         self.chunker = get_chunker(*chunker_params, seed=key.chunk_seed, sparse=False)
			
 
				+        self.hlm = HardLinkManager(id_type=str, info_type=list)  # path -> chunks
			
 
				 
			
 
				     @contextmanager
			
 
				     def create_helper(self, tarinfo, status=None, type=None):
			
@@ -1504,11 +1453,21 @@ class TarfileObjectProcessors:
 
				             item.rdev = os.makedev(tarinfo.devmajor, tarinfo.devminor)
			
 
				             return status
			
 
				 
			
 
				-    def process_link(self, *, tarinfo, status, type):
			
 
				+    def process_symlink(self, *, tarinfo, status, type):
			
 
				         with self.create_helper(tarinfo, status, type) as (item, status):
			
 
				             item.source = tarinfo.linkname
			
 
				             return status
			
 
				 
			
 
				+    def process_hardlink(self, *, tarinfo, status, type):
			
 
				+        with self.create_helper(tarinfo, status, type) as (item, status):
			
 
				+            # create a not hardlinked borg item, reusing the chunks, see HardLinkManager.__doc__
			
 
				+            chunks = self.hlm.retrieve(tarinfo.linkname)
			
 
				+            if chunks is not None:
			
 
				+                item.chunks = chunks
			
 
				+            item.get_size(memorize=True, from_chunks=True)
			
 
				+            self.stats.nfiles += 1
			
 
				+            return status
			
 
				+
			
 
				     def process_file(self, *, tarinfo, status, type, tar):
			
 
				         with self.create_helper(tarinfo, status, type) as (item, status):
			
 
				             self.print_file_status(status, tarinfo.name)
			
@@ -1516,8 +1475,10 @@ class TarfileObjectProcessors:
 
				             fd = tar.extractfile(tarinfo)
			
 
				             self.process_file_chunks(item, self.cache, self.stats, self.show_progress,
			
 
				                                      backup_io_iter(self.chunker.chunkify(fd)))
			
 
				-            item.get_size(memorize=True)
			
 
				+            item.get_size(memorize=True, from_chunks=True)
			
 
				             self.stats.nfiles += 1
			
 
				+            # we need to remember ALL files, see HardLinkManager.__doc__
			
 
				+            self.hlm.remember(id=tarinfo.name, info=item.chunks)
			
 
				             return status
			
 
				 
			
 
				 
			
@@ -1787,7 +1748,7 @@ class ArchiveChecker:
 
				                 continue
			
 
				             if not valid_msgpacked_dict(data, archive_keys_serialized):
			
 
				                 continue
			
 
				-            if b'cmdline' not in data or b'\xa7version\x01' not in data:
			
 
				+            if b'cmdline' not in data or b'\xa7version\x02' not in data:
			
 
				                 continue
			
 
				             try:
			
 
				                 archive = msgpack.unpackb(data)
			
@@ -1944,9 +1905,6 @@ class ArchiveChecker:
 
				             def valid_item(obj):
			
 
				                 if not isinstance(obj, StableDict):
			
 
				                     return False, 'not a dictionary'
			
 
				-                # A bug in Attic up to and including release 0.13 added a (meaningless) b'acl' key to every item.
			
 
				-                # We ignore it here, should it exist. See test_attic013_acl_bug for details.
			
 
				-                obj.pop(b'acl', None)
			
 
				                 keys = set(obj)
			
 
				                 if not required_item_keys.issubset(keys):
			
 
				                     return False, 'missing required keys: ' + list_keys_safe(required_item_keys - keys)
			
@@ -2031,7 +1989,7 @@ class ArchiveChecker:
 
				                     del self.manifest.archives[info.name]
			
 
				                     continue
			
 
				                 archive = ArchiveItem(internal_dict=msgpack.unpackb(data))
			
 
				-                if archive.version != 1:
			
 
				+                if archive.version != 2:
			
 
				                     raise Exception('Unknown archive metadata version')
			
 
				                 archive.cmdline = [safe_decode(arg) for arg in archive.cmdline]
			
 
				                 items_buffer = ChunkBuffer(self.key)
			
@@ -2130,34 +2088,11 @@ class ArchiveRecreater:
 
				 
			
 
				     def process_items(self, archive, target):
			
 
				         matcher = self.matcher
			
 
				-        target_is_subset = not matcher.empty()
			
 
				-        hardlink_masters = {} if target_is_subset else None
			
 
				-
			
 
				-        def item_is_hardlink_master(item):
			
 
				-            return (target_is_subset and
			
 
				-                    hardlinkable(item.mode) and
			
 
				-                    item.get('hardlink_master', True) and
			
 
				-                    'source' not in item)
			
 
				 
			
 
				         for item in archive.iter_items():
			
 
				             if not matcher.match(item.path):
			
 
				                 self.print_file_status('x', item.path)
			
 
				-                if item_is_hardlink_master(item):
			
 
				-                    hardlink_masters[item.path] = (item.get('chunks'), item.get('chunks_healthy'), None)
			
 
				                 continue
			
 
				-            if target_is_subset and hardlinkable(item.mode) and item.get('source') in hardlink_masters:
			
 
				-                # master of this hard link is outside the target subset
			
 
				-                chunks, chunks_healthy, new_source = hardlink_masters[item.source]
			
 
				-                if new_source is None:
			
 
				-                    # First item to use this master, move the chunks
			
 
				-                    item.chunks = chunks
			
 
				-                    if chunks_healthy is not None:
			
 
				-                        item.chunks_healthy = chunks_healthy
			
 
				-                    hardlink_masters[item.source] = (None, None, item.path)
			
 
				-                    del item.source
			
 
				-                else:
			
 
				-                    # Master was already moved, only update this item's source
			
 
				-                    item.source = new_source
			
 
				             if self.dry_run:
			
 
				                 self.print_file_status('-', item.path)
			
 
				             else:
			
@@ -2264,30 +2199,13 @@ class ArchiveRecreater:
 
				         tag_files = []
			
 
				         tagged_dirs = []
			
 
				 
			
 
				-        # to support reading hard-linked CACHEDIR.TAGs (aka CACHE_TAG_NAME), similar to hardlink_masters:
			
 
				-        cachedir_masters = {}
			
 
				-
			
 
				-        if self.exclude_caches:
			
 
				-            # sadly, due to how CACHEDIR.TAG works (filename AND file [header] contents) and
			
 
				-            # how borg deals with hardlinks (slave hardlinks referring back to master hardlinks),
			
 
				-            # we need to pass over the archive collecting hardlink master paths.
			
 
				-            # as seen in issue #4911, the master paths can have an arbitrary filenames,
			
 
				-            # not just CACHEDIR.TAG.
			
 
				-            for item in archive.iter_items(filter=lambda item: os.path.basename(item.path) == CACHE_TAG_NAME):
			
 
				-                if stat.S_ISREG(item.mode) and 'chunks' not in item and 'source' in item:
			
 
				-                    # this is a hardlink slave, referring back to its hardlink master (via item.source)
			
 
				-                    cachedir_masters[item.source] = None  # we know the key (path), but not the value (item) yet
			
 
				-
			
 
				         for item in archive.iter_items(
			
 
				                 filter=lambda item: os.path.basename(item.path) == CACHE_TAG_NAME or matcher.match(item.path)):
			
 
				-            if self.exclude_caches and item.path in cachedir_masters:
			
 
				-                cachedir_masters[item.path] = item
			
 
				             dir, tag_file = os.path.split(item.path)
			
 
				             if tag_file in self.exclude_if_present:
			
 
				                 exclude(dir, item)
			
 
				             elif self.exclude_caches and tag_file == CACHE_TAG_NAME and stat.S_ISREG(item.mode):
			
 
				-                content_item = item if 'chunks' in item else cachedir_masters[item.source]
			
 
				-                file = open_item(archive, content_item)
			
 
				+                file = open_item(archive, item)
			
 
				                 if file.read(len(CACHE_TAG_CONTENTS)) == CACHE_TAG_CONTENTS:
			
 
				                     exclude(dir, item)
			
 
				         matcher.add(tag_files, IECommand.Include)
			
--- a/src/borg/archiver.py
+++ b/src/borg/archiver.py
@@ -29,6 +29,7 @@ try:
 
				     from contextlib import contextmanager
			
 
				     from datetime import datetime, timedelta
			
 
				     from io import TextIOWrapper
			
 
				+    from struct import Struct
			
 
				 
			
 
				     from .logger import create_logger, setup_logging
			
 
				 
			
@@ -44,7 +45,7 @@ try:
 
				     from .archive import has_link
			
 
				     from .cache import Cache, assert_secure, SecurityManager
			
 
				     from .constants import *  # NOQA
			
 
				-    from .compress import CompressionSpec
			
 
				+    from .compress import CompressionSpec, ZLIB, ZLIB_legacy, ObfuscateSize
			
 
				     from .crypto.key import key_creator, key_argument_names, tam_required_file, tam_required
			
 
				     from .crypto.key import RepoKey, KeyfileKey, Blake2RepoKey, Blake2KeyfileKey, FlexiKey
			
 
				     from .crypto.keymanager import KeyManager
			
@@ -59,7 +60,7 @@ try:
 
				     from .helpers import timestamp
			
 
				     from .helpers import get_cache_dir, os_stat
			
 
				     from .helpers import Manifest, AI_HUMAN_SORT_KEYS
			
 
				-    from .helpers import hardlinkable
			
 
				+    from .helpers import HardLinkManager
			
 
				     from .helpers import StableDict
			
 
				     from .helpers import check_python, check_extension_modules
			
 
				     from .helpers import dir_is_tagged, is_slow_msgpack, is_supported_msgpack, yes, sysinfo
			
@@ -338,6 +339,137 @@ class Archiver:
 
				         ).serve()
			
 
				         return EXIT_SUCCESS
			
 
				 
			
 
				+    @with_other_repository(manifest=True, key=True, compatibility=(Manifest.Operation.READ,))
			
 
				+    @with_repository(exclusive=True, manifest=True, cache=True, compatibility=(Manifest.Operation.WRITE,))
			
 
				+    def do_transfer(self, args, *,
			
 
				+               repository, manifest, key, cache,
			
 
				+               other_repository=None, other_manifest=None, other_key=None):
			
 
				+        """archives transfer from other repository"""
			
 
				+
			
 
				+        ITEM_KEY_WHITELIST = {'path', 'source', 'rdev', 'chunks', 'chunks_healthy', 'hlid',
			
 
				+                              'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime', 'birthtime', 'size',
			
 
				+                              'xattrs', 'bsdflags', 'acl_nfs4', 'acl_access', 'acl_default', 'acl_extended',
			
 
				+                              'part'}
			
 
				+
			
 
				+        def upgrade_item(item):
			
 
				+            """upgrade item as needed, get rid of legacy crap"""
			
 
				+            if hlm.borg1_hardlink_master(item):
			
 
				+                item._dict['hlid'] = hlid = hlm.hardlink_id_from_path(item._dict['path'])
			
 
				+                hlm.remember(id=hlid, info=(item._dict.get('chunks'), item._dict.get('chunks_healthy')))
			
 
				+            elif hlm.borg1_hardlink_slave(item):
			
 
				+                item._dict['hlid'] = hlid = hlm.hardlink_id_from_path(item._dict['source'])
			
 
				+                chunks, chunks_healthy = hlm.retrieve(id=hlid, default=(None, None))
			
 
				+                if chunks is not None:
			
 
				+                    item._dict['chunks'] = chunks
			
 
				+                    for chunk_id, _, _ in chunks:
			
 
				+                        cache.chunk_incref(chunk_id, archive.stats)
			
 
				+                if chunks_healthy is not None:
			
 
				+                    item._dict['chunks_healthy'] = chunks
			
 
				+                item._dict.pop('source')  # not used for hardlinks any more, replaced by hlid
			
 
				+            for attr in 'atime', 'ctime', 'mtime', 'birthtime':
			
 
				+                if attr in item:
			
 
				+                    ns = getattr(item, attr)  # decode (bigint or Timestamp) --> int ns
			
 
				+                    setattr(item, attr, ns)  # encode int ns --> msgpack.Timestamp only, no bigint any more
			
 
				+            # make sure we only have desired stuff in the new item. specifically, make sure to get rid of:
			
 
				+            # - 'acl' remnants of bug in attic <= 0.13
			
 
				+            # - 'hardlink_master' (superseded by hlid)
			
 
				+            new_item_dict = {key: value for key, value in item.as_dict().items() if key in ITEM_KEY_WHITELIST}
			
 
				+            new_item = Item(internal_dict=new_item_dict)
			
 
				+            new_item.get_size(memorize=True)  # if not already present: compute+remember size for items with chunks
			
 
				+            assert all(key in new_item for key in REQUIRED_ITEM_KEYS)
			
 
				+            return new_item
			
 
				+
			
 
				+        def upgrade_compressed_chunk(chunk):
			
 
				+            def upgrade_zlib_and_level(chunk):
			
 
				+                if ZLIB_legacy.detect(chunk):
			
 
				+                    ctype = ZLIB.ID
			
 
				+                    chunk = ctype + level + chunk  # get rid of the attic legacy: prepend separate type/level bytes
			
 
				+                else:
			
 
				+                    ctype = chunk[0:1]
			
 
				+                    chunk = ctype + level + chunk[2:]  # keep type same, but set level
			
 
				+                return chunk
			
 
				+
			
 
				+            ctype = chunk[0:1]
			
 
				+            level = b'\xFF'  # FF means unknown compression level
			
 
				+
			
 
				+            if ctype == ObfuscateSize.ID:
			
 
				+                # in older borg, we used unusual byte order
			
 
				+                old_header_fmt = Struct('>I')
			
 
				+                new_header_fmt = ObfuscateSize.header_fmt
			
 
				+                length = ObfuscateSize.header_len
			
 
				+                size_bytes = chunk[2:2+length]
			
 
				+                size = old_header_fmt.unpack(size_bytes)
			
 
				+                size_bytes = new_header_fmt.pack(size)
			
 
				+                compressed = chunk[2+length:]
			
 
				+                compressed = upgrade_zlib_and_level(compressed)
			
 
				+                chunk = ctype + level + size_bytes + compressed
			
 
				+            else:
			
 
				+                chunk = upgrade_zlib_and_level(chunk)
			
 
				+            return chunk
			
 
				+
			
 
				+        dry_run = args.dry_run
			
 
				+
			
 
				+        args.consider_checkpoints = True
			
 
				+        archive_names = tuple(x.name for x in other_manifest.archives.list_considering(args))
			
 
				+        if not archive_names:
			
 
				+            return EXIT_SUCCESS
			
 
				+
			
 
				+        for name in archive_names:
			
 
				+            transfer_size = 0
			
 
				+            present_size = 0
			
 
				+            if name in manifest.archives and not dry_run:
			
 
				+                print(f"{name}: archive is already present in destination repo, skipping.")
			
 
				+            else:
			
 
				+                if not dry_run:
			
 
				+                    print(f"{name}: copying archive to destination repo...")
			
 
				+                hlm = HardLinkManager(id_type=bytes, info_type=tuple)  # hlid -> (chunks, chunks_healthy)
			
 
				+                other_archive = Archive(other_repository, other_key, other_manifest, name)
			
 
				+                archive = Archive(repository, key, manifest, name, cache=cache, create=True) if not dry_run else None
			
 
				+                for item in other_archive.iter_items():
			
 
				+                    if 'chunks' in item:
			
 
				+                        chunks = []
			
 
				+                        for chunk_id, size, _ in item.chunks:
			
 
				+                            refcount = cache.seen_chunk(chunk_id, size)
			
 
				+                            if refcount == 0:  # target repo does not yet have this chunk
			
 
				+                                if not dry_run:
			
 
				+                                    cdata = other_repository.get(chunk_id)
			
 
				+                                    # keep compressed payload same, avoid decompression / recompression
			
 
				+                                    data = other_key.decrypt(chunk_id, cdata, decompress=False)
			
 
				+                                    data = upgrade_compressed_chunk(data)
			
 
				+                                    chunk_entry = cache.add_chunk(chunk_id, data, archive.stats, wait=False,
			
 
				+                                                                  compress=False, size=size)
			
 
				+                                    cache.repository.async_response(wait=False)
			
 
				+                                    chunks.append(chunk_entry)
			
 
				+                                transfer_size += size
			
 
				+                            else:
			
 
				+                                if not dry_run:
			
 
				+                                    chunk_entry = cache.chunk_incref(chunk_id, archive.stats)
			
 
				+                                    chunks.append(chunk_entry)
			
 
				+                                present_size += size
			
 
				+                        if not dry_run:
			
 
				+                            item.chunks = chunks  # overwrite! IDs and sizes are same, csizes are likely different
			
 
				+                            archive.stats.nfiles += 1
			
 
				+                    if not dry_run:
			
 
				+                        archive.add_item(upgrade_item(item))
			
 
				+                if not dry_run:
			
 
				+                    additional_metadata = {}
			
 
				+                    # keep all metadata except archive version and stats. also do not keep
			
 
				+                    # recreate_source_id, recreate_args, recreate_partial_chunks which were used only in 1.1.0b1 .. b2.
			
 
				+                    for attr in ('cmdline', 'hostname', 'username', 'time', 'time_end', 'comment',
			
 
				+                                 'chunker_params', 'recreate_cmdline'):
			
 
				+                        if hasattr(other_archive.metadata, attr):
			
 
				+                            additional_metadata[attr] = getattr(other_archive.metadata, attr)
			
 
				+                    archive.save(stats=archive.stats, additional_metadata=additional_metadata)
			
 
				+                    print(f"{name}: finished. "
			
 
				+                          f"transfer_size: {format_file_size(transfer_size)} "
			
 
				+                          f"present_size: {format_file_size(present_size)}")
			
 
				+                else:
			
 
				+                    print(f"{name}: completed" if transfer_size == 0 else
			
 
				+                          f"{name}: incomplete, "
			
 
				+                          f"transfer_size: {format_file_size(transfer_size)} "
			
 
				+                          f"present_size: {format_file_size(present_size)}")
			
 
				+        return EXIT_SUCCESS
			
 
				+
			
 
				     @with_repository(create=True, exclusive=True, manifest=False)
			
 
				     @with_other_repository(key=True, compatibility=(Manifest.Operation.READ, ))
			
 
				     def do_init(self, args, repository, *, other_repository=None, other_key=None):
			
@@ -1055,16 +1187,14 @@ class Archiver:
 
				             self.print_file_status(status, path)
			
 
				 
			
 
				     @staticmethod
			
 
				-    def build_filter(matcher, peek_and_store_hardlink_masters, strip_components):
			
 
				+    def build_filter(matcher, strip_components):
			
 
				         if strip_components:
			
 
				             def item_filter(item):
			
 
				                 matched = matcher.match(item.path) and os.sep.join(item.path.split(os.sep)[strip_components:])
			
 
				-                peek_and_store_hardlink_masters(item, matched)
			
 
				                 return matched
			
 
				         else:
			
 
				             def item_filter(item):
			
 
				                 matched = matcher.match(item.path)
			
 
				-                peek_and_store_hardlink_masters(item, matched)
			
 
				                 return matched
			
 
				         return item_filter
			
 
				 
			
@@ -1087,33 +1217,18 @@ class Archiver:
 
				         sparse = args.sparse
			
 
				         strip_components = args.strip_components
			
 
				         dirs = []
			
 
				-        partial_extract = not matcher.empty() or strip_components
			
 
				-        hardlink_masters = {} if partial_extract or not has_link else None
			
 
				+        hlm = HardLinkManager(id_type=bytes, info_type=str)  # hlid -> path
			
 
				 
			
 
				-        def peek_and_store_hardlink_masters(item, matched):
			
 
				-            # not has_link:
			
 
				-            # OS does not have hardlink capability thus we need to remember the chunks so that
			
 
				-            # we can extract all hardlinks as separate normal (not-hardlinked) files instead.
			
 
				-            #
			
 
				-            # partial_extract and not matched and hardlinkable:
			
 
				-            # we do not extract the very first hardlink, so we need to remember the chunks
			
 
				-            # in hardlinks_master, so we can use them when we extract some 2nd+ hardlink item
			
 
				-            # that has no chunks list.
			
 
				-            if ((not has_link or (partial_extract and not matched and hardlinkable(item.mode))) and
			
 
				-                    (item.get('hardlink_master', True) and 'source' not in item)):
			
 
				-                hardlink_masters[item.get('path')] = (item.get('chunks'), None)
			
 
				-
			
 
				-        filter = self.build_filter(matcher, peek_and_store_hardlink_masters, strip_components)
			
 
				+        filter = self.build_filter(matcher, strip_components)
			
 
				         if progress:
			
 
				             pi = ProgressIndicatorPercent(msg='%5.1f%% Extracting: %s', step=0.1, msgid='extract')
			
 
				             pi.output('Calculating total archive size for the progress indicator (might take long for large archives)')
			
 
				-            extracted_size = sum(item.get_size(hardlink_masters) for item in archive.iter_items(filter))
			
 
				+            extracted_size = sum(item.get_size() for item in archive.iter_items(filter))
			
 
				             pi.total = extracted_size
			
 
				         else:
			
 
				             pi = None
			
 
				 
			
 
				-        for item in archive.iter_items(filter, partial_extract=partial_extract,
			
 
				-                                       preload=True, hardlink_masters=hardlink_masters):
			
 
				+        for item in archive.iter_items(filter, preload=True):
			
 
				             orig_path = item.path
			
 
				             if strip_components:
			
 
				                 item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
			
@@ -1128,13 +1243,13 @@ class Archiver:
 
				                 logging.getLogger('borg.output.list').info(remove_surrogates(item.path))
			
 
				             try:
			
 
				                 if dry_run:
			
 
				-                    archive.extract_item(item, dry_run=True, pi=pi)
			
 
				+                    archive.extract_item(item, dry_run=True, hlm=hlm, pi=pi)
			
 
				                 else:
			
 
				                     if stat.S_ISDIR(item.mode):
			
 
				                         dirs.append(item)
			
 
				                         archive.extract_item(item, stdout=stdout, restore_attrs=False)
			
 
				                     else:
			
 
				-                        archive.extract_item(item, stdout=stdout, sparse=sparse, hardlink_masters=hardlink_masters,
			
 
				+                        archive.extract_item(item, stdout=stdout, sparse=sparse, hlm=hlm,
			
 
				                                              stripped_components=strip_components, original_path=orig_path, pi=pi)
			
 
				             except (BackupOSError, BackupError) as e:
			
 
				                 self.print_warning('%s: %s', remove_surrogates(orig_path), e)
			
@@ -1199,15 +1314,9 @@ class Archiver:
 
				         progress = args.progress
			
 
				         output_list = args.output_list
			
 
				         strip_components = args.strip_components
			
 
				-        partial_extract = not matcher.empty() or strip_components
			
 
				-        hardlink_masters = {} if partial_extract else None
			
 
				-
			
 
				-        def peek_and_store_hardlink_masters(item, matched):
			
 
				-            if ((partial_extract and not matched and hardlinkable(item.mode)) and
			
 
				-                    (item.get('hardlink_master', True) and 'source' not in item)):
			
 
				-                hardlink_masters[item.get('path')] = (item.get('chunks'), None)
			
 
				+        hlm = HardLinkManager(id_type=bytes, info_type=str)  # hlid -> path
			
 
				 
			
 
				-        filter = self.build_filter(matcher, peek_and_store_hardlink_masters, strip_components)
			
 
				+        filter = self.build_filter(matcher, strip_components)
			
 
				 
			
 
				         # The | (pipe) symbol instructs tarfile to use a streaming mode of operation
			
 
				         # where it never seeks on the passed fileobj.
			
@@ -1217,7 +1326,7 @@ class Archiver:
 
				         if progress:
			
 
				             pi = ProgressIndicatorPercent(msg='%5.1f%% Processing: %s', step=0.1, msgid='extract')
			
 
				             pi.output('Calculating size')
			
 
				-            extracted_size = sum(item.get_size(hardlink_masters) for item in archive.iter_items(filter))
			
 
				+            extracted_size = sum(item.get_size() for item in archive.iter_items(filter))
			
 
				             pi.total = extracted_size
			
 
				         else:
			
 
				             pi = None
			
@@ -1252,9 +1361,8 @@ class Archiver:
 
				             tarinfo.gid = item.gid
			
 
				             tarinfo.uname = item.user or ''
			
 
				             tarinfo.gname = item.group or ''
			
 
				-            # The linkname in tar has the same dual use the 'source' attribute of Borg items,
			
 
				-            # i.e. for symlinks it means the destination, while for hardlinks it refers to the
			
 
				-            # file.
			
 
				+            # The linkname in tar has 2 uses:
			
 
				+            # for symlinks it means the destination, while for hardlinks it refers to the file.
			
 
				             # Since hardlinks in tar have a different type code (LNKTYPE) the format might
			
 
				             # support hardlinking arbitrary objects (including symlinks and directories), but
			
 
				             # whether implementations actually support that is a whole different question...
			
@@ -1263,23 +1371,16 @@ class Archiver:
 
				             modebits = stat.S_IFMT(item.mode)
			
 
				             if modebits == stat.S_IFREG:
			
 
				                 tarinfo.type = tarfile.REGTYPE
			
 
				-                if 'source' in item:
			
 
				-                    source = os.sep.join(item.source.split(os.sep)[strip_components:])
			
 
				-                    if hardlink_masters is None:
			
 
				-                        linkname = source
			
 
				-                    else:
			
 
				-                        chunks, linkname = hardlink_masters.get(item.source, (None, source))
			
 
				-                    if linkname:
			
 
				-                        # Master was already added to the archive, add a hardlink reference to it.
			
 
				+                if 'hlid' in item:
			
 
				+                    linkname = hlm.retrieve(id=item.hlid)
			
 
				+                    if linkname is not None:
			
 
				+                        # the first hardlink was already added to the archive, add a tar-hardlink reference to it.
			
 
				                         tarinfo.type = tarfile.LNKTYPE
			
 
				                         tarinfo.linkname = linkname
			
 
				-                    elif chunks is not None:
			
 
				-                        # The item which has the chunks was not put into the tar, therefore
			
 
				-                        # we do that now and update hardlink_masters to reflect that.
			
 
				-                        item.chunks = chunks
			
 
				+                    else:
			
 
				                         tarinfo.size = item.get_size()
			
 
				                         stream = item_content_stream(item)
			
 
				-                        hardlink_masters[item.get('source') or original_path] = (None, item.path)
			
 
				+                        hlm.remember(id=item.hlid, info=item.path)
			
 
				                 else:
			
 
				                     tarinfo.size = item.get_size()
			
 
				                     stream = item_content_stream(item)
			
@@ -1337,8 +1438,7 @@ class Archiver:
 
				                 ph['BORG.item.meta'] = meta_text
			
 
				             return ph
			
 
				 
			
 
				-        for item in archive.iter_items(filter, partial_extract=partial_extract,
			
 
				-                                       preload=True, hardlink_masters=hardlink_masters):
			
 
				+        for item in archive.iter_items(filter, preload=True):
			
 
				             orig_path = item.path
			
 
				             if strip_components:
			
 
				                 item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
			
@@ -1973,12 +2073,11 @@ class Archiver:
 
				             elif tarinfo.isdir():
			
 
				                 status = tfo.process_dir(tarinfo=tarinfo, status='d', type=stat.S_IFDIR)
			
 
				             elif tarinfo.issym():
			
 
				-                status = tfo.process_link(tarinfo=tarinfo, status='s', type=stat.S_IFLNK)
			
 
				+                status = tfo.process_symlink(tarinfo=tarinfo, status='s', type=stat.S_IFLNK)
			
 
				             elif tarinfo.islnk():
			
 
				-                # tar uses the same hardlink model as borg (rather vice versa); the first instance of a hardlink
			
 
				-                # is stored as a regular file, later instances are special entries referencing back to the
			
 
				-                # first instance.
			
 
				-                status = tfo.process_link(tarinfo=tarinfo, status='h', type=stat.S_IFREG)
			
 
				+                # tar uses a hardlink model like: the first instance of a hardlink is stored as a regular file,
			
 
				+                # later instances are special entries referencing back to the first instance.
			
 
				+                status = tfo.process_hardlink(tarinfo=tarinfo, status='h', type=stat.S_IFREG)
			
 
				             elif tarinfo.isblk():
			
 
				                 status = tfo.process_dev(tarinfo=tarinfo, status='b', type=stat.S_IFBLK)
			
 
				             elif tarinfo.ischr():
			
@@ -4083,6 +4182,43 @@ class Archiver:
 
				                                help='archives to delete')
			
 
				         define_archive_filters_group(subparser)
			
 
				 
			
 
				+        # borg transfer
			
 
				+        transfer_epilog = process_epilog("""
			
 
				+        This command transfers archives from one repository to another repository.
			
 
				+
			
 
				+        Suggested use:
			
 
				+
			
 
				+        # initialize DST_REPO reusing key material from SRC_REPO, so that
			
 
				+        # chunking and chunk id generation will work in the same way as before.
			
 
				+        borg init --other-location=SRC_REPO --encryption=DST_ENC DST_REPO
			
 
				+
			
 
				+        # transfer archives from SRC_REPO to DST_REPO
			
 
				+        borg transfer --dry-run SRC_REPO DST_REPO  # check what it would do
			
 
				+        borg transfer           SRC_REPO DST_REPO  # do it!
			
 
				+        borg transfer --dry-run SRC_REPO DST_REPO  # check! anything left?
			
 
				+
			
 
				+        The default is to transfer all archives, including checkpoint archives.
			
 
				+
			
 
				+        You could use the misc. archive filter options to limit which archives it will
			
 
				+        transfer, e.g. using the --prefix option. This is recommended for big
			
 
				+        repositories with multiple data sets to keep the runtime per invocation lower.
			
 
				+        """)
			
 
				+        subparser = subparsers.add_parser('transfer', parents=[common_parser], add_help=False,
			
 
				+                                          description=self.do_transfer.__doc__,
			
 
				+                                          epilog=transfer_epilog,
			
 
				+                                          formatter_class=argparse.RawDescriptionHelpFormatter,
			
 
				+                                          help='transfer of archives from another repository')
			
 
				+        subparser.set_defaults(func=self.do_transfer)
			
 
				+        subparser.add_argument('-n', '--dry-run', dest='dry_run', action='store_true',
			
 
				+                               help='do not change repository, just check')
			
 
				+        subparser.add_argument('other_location', metavar='SRC_REPOSITORY',
			
 
				+                               type=location_validator(archive=False, other=True),
			
 
				+                               help='source repository')
			
 
				+        subparser.add_argument('location', metavar='DST_REPOSITORY',
			
 
				+                               type=location_validator(archive=False, other=False),
			
 
				+                               help='destination repository')
			
 
				+        define_archive_filters_group(subparser)
			
 
				+
			
 
				         # borg diff
			
 
				         diff_epilog = process_epilog("""
			
 
				             This command finds differences (file contents, user/group/mode) between archives.
			
--- a/src/borg/cache.py
+++ b/src/borg/cache.py
@@ -19,7 +19,7 @@ from .helpers import Location
 
				 from .helpers import Error
			
 
				 from .helpers import Manifest
			
 
				 from .helpers import get_cache_dir, get_security_dir
			
 
				-from .helpers import int_to_bigint, bigint_to_int, bin_to_hex, parse_stringified_list
			
 
				+from .helpers import bin_to_hex, parse_stringified_list
			
 
				 from .helpers import format_file_size
			
 
				 from .helpers import safe_ns
			
 
				 from .helpers import yes
			
@@ -28,6 +28,7 @@ from .helpers import ProgressIndicatorPercent, ProgressIndicatorMessage
 
				 from .helpers import set_ec, EXIT_WARNING
			
 
				 from .helpers import safe_unlink
			
 
				 from .helpers import msgpack
			
 
				+from .helpers.msgpack import int_to_timestamp, timestamp_to_int
			
 
				 from .item import ArchiveItem, ChunkListEntry
			
 
				 from .crypto.key import PlaintextKey
			
 
				 from .crypto.file_integrity import IntegrityCheckedFile, DetachedIntegrityCheckedFile, FileIntegrityError
			
@@ -623,7 +624,7 @@ class LocalCache(CacheStatsMixin):
 
				                     # this is to avoid issues with filesystem snapshots and cmtime granularity.
			
 
				                     # Also keep files from older backups that have not reached BORG_FILES_CACHE_TTL yet.
			
 
				                     entry = FileCacheEntry(*msgpack.unpackb(item))
			
 
				-                    if entry.age == 0 and bigint_to_int(entry.cmtime) < self._newest_cmtime or \
			
 
				+                    if entry.age == 0 and timestamp_to_int(entry.cmtime) < self._newest_cmtime or \
			
 
				                        entry.age > 0 and entry.age < ttl:
			
 
				                         msgpack.pack((path_hash, entry), fd)
			
 
				                         entry_count += 1
			
@@ -756,7 +757,7 @@ class LocalCache(CacheStatsMixin):
 
				             csize, data = decrypted_repository.get(archive_id)
			
 
				             chunk_idx.add(archive_id, 1, len(data), csize)
			
 
				             archive = ArchiveItem(internal_dict=msgpack.unpackb(data))
			
 
				-            if archive.version != 1:
			
 
				+            if archive.version not in (1, 2):  # legacy
			
 
				                 raise Exception('Unknown archive metadata version')
			
 
				             sync = CacheSynchronizer(chunk_idx)
			
 
				             for item_id, (csize, data) in zip(archive.items, decrypted_repository.get_many(archive.items)):
			
@@ -1018,10 +1019,10 @@ class LocalCache(CacheStatsMixin):
 
				         if 'i' in cache_mode and entry.inode != st.st_ino:
			
 
				             files_cache_logger.debug('KNOWN-CHANGED: file inode number has changed: %r', hashed_path)
			
 
				             return True, None
			
 
				-        if 'c' in cache_mode and bigint_to_int(entry.cmtime) != st.st_ctime_ns:
			
 
				+        if 'c' in cache_mode and timestamp_to_int(entry.cmtime) != st.st_ctime_ns:
			
 
				             files_cache_logger.debug('KNOWN-CHANGED: file ctime has changed: %r', hashed_path)
			
 
				             return True, None
			
 
				-        elif 'm' in cache_mode and bigint_to_int(entry.cmtime) != st.st_mtime_ns:
			
 
				+        elif 'm' in cache_mode and timestamp_to_int(entry.cmtime) != st.st_mtime_ns:
			
 
				             files_cache_logger.debug('KNOWN-CHANGED: file mtime has changed: %r', hashed_path)
			
 
				             return True, None
			
 
				         # we ignored the inode number in the comparison above or it is still same.
			
@@ -1049,7 +1050,7 @@ class LocalCache(CacheStatsMixin):
 
				         elif 'm' in cache_mode:
			
 
				             cmtime_type = 'mtime'
			
 
				             cmtime_ns = safe_ns(st.st_mtime_ns)
			
 
				-        entry = FileCacheEntry(age=0, inode=st.st_ino, size=st.st_size, cmtime=int_to_bigint(cmtime_ns), chunk_ids=ids)
			
 
				+        entry = FileCacheEntry(age=0, inode=st.st_ino, size=st.st_size, cmtime=int_to_timestamp(cmtime_ns), chunk_ids=ids)
			
 
				         self.files[path_hash] = msgpack.packb(entry)
			
 
				         self._newest_cmtime = max(self._newest_cmtime or 0, cmtime_ns)
			
 
				         files_cache_logger.debug('FILES-CACHE-UPDATE: put %r [has %s] <- %r',
			
--- a/src/borg/compress.pyx
+++ b/src/borg/compress.pyx
@@ -56,16 +56,21 @@ cdef class CompressorBase:
 
				     also handles compression format auto detection and
			
 
				     adding/stripping the ID header (which enable auto detection).
			
 
				     """
			
 
				-    ID = b'\xFF\xFF'  # reserved and not used
			
 
				-                      # overwrite with a unique 2-bytes bytestring in child classes
			
 
				+    ID = b'\xFF'  # reserved and not used
			
 
				+                  # overwrite with a unique 1-byte bytestring in child classes
			
 
				     name = 'baseclass'
			
 
				 
			
 
				     @classmethod
			
 
				     def detect(cls, data):
			
 
				         return data.startswith(cls.ID)
			
 
				 
			
 
				-    def __init__(self, **kwargs):
			
 
				-        pass
			
 
				+    def __init__(self, level=255, **kwargs):
			
 
				+        assert 0 <= level <= 255
			
 
				+        if self.ID is not None:
			
 
				+            self.id_level = self.ID + bytes((level, ))  # level 255 means "unknown level"
			
 
				+            assert len(self.id_level) == 2
			
 
				+        else:
			
 
				+            self.id_level = None
			
 
				 
			
 
				     def decide(self, data):
			
 
				         """
			
@@ -85,8 +90,8 @@ cdef class CompressorBase:
 
				         Compress *data* (bytes) and return bytes result. Prepend the ID bytes of this compressor,
			
 
				         which is needed so that the correct decompressor can be used for decompression.
			
 
				         """
			
 
				-        # add ID bytes
			
 
				-        return self.ID + data
			
 
				+        # add id_level bytes
			
 
				+        return self.id_level + data
			
 
				 
			
 
				     def decompress(self, data):
			
 
				         """
			
@@ -96,7 +101,7 @@ cdef class CompressorBase:
 
				         Only handles input generated by _this_ Compressor - for a general purpose
			
 
				         decompression method see *Compressor.decompress*.
			
 
				         """
			
 
				-        # strip ID bytes
			
 
				+        # strip id_level bytes
			
 
				         return data[2:]
			
 
				 
			
 
				 cdef class DecidingCompressor(CompressorBase):
			
@@ -106,8 +111,8 @@ cdef class DecidingCompressor(CompressorBase):
 
				     """
			
 
				     name = 'decidebaseclass'
			
 
				 
			
 
				-    def __init__(self, **kwargs):
			
 
				-        super().__init__(**kwargs)
			
 
				+    def __init__(self, level=255, **kwargs):
			
 
				+        super().__init__(level=level, **kwargs)
			
 
				 
			
 
				     def _decide(self, data):
			
 
				         """
			
@@ -148,9 +153,12 @@ class CNONE(CompressorBase):
 
				     """
			
 
				     none - no compression, just pass through data
			
 
				     """
			
 
				-    ID = b'\x00\x00'
			
 
				+    ID = b'\x00'
			
 
				     name = 'none'
			
 
				 
			
 
				+    def __init__(self, level=255, **kwargs):
			
 
				+        super().__init__(level=level, **kwargs)  # no defined levels for CNONE, so just say "unknown"
			
 
				+
			
 
				     def compress(self, data):
			
 
				         return super().compress(data)
			
 
				 
			
@@ -170,11 +178,11 @@ class LZ4(DecidingCompressor):
 
				         - wrapper releases CPython's GIL to support multithreaded code
			
 
				         - uses safe lz4 methods that never go beyond the end of the output buffer
			
 
				     """
			
 
				-    ID = b'\x01\x00'
			
 
				+    ID = b'\x01'
			
 
				     name = 'lz4'
			
 
				 
			
 
				-    def __init__(self, **kwargs):
			
 
				-        pass
			
 
				+    def __init__(self, level=255, **kwargs):
			
 
				+        super().__init__(level=level, **kwargs)  # no defined levels for LZ4, so just say "unknown"
			
 
				 
			
 
				     def _decide(self, idata):
			
 
				         """
			
@@ -235,11 +243,11 @@ class LZMA(DecidingCompressor):
 
				     """
			
 
				     lzma compression / decompression
			
 
				     """
			
 
				-    ID = b'\x02\x00'
			
 
				+    ID = b'\x02'
			
 
				     name = 'lzma'
			
 
				 
			
 
				     def __init__(self, level=6, **kwargs):
			
 
				-        super().__init__(**kwargs)
			
 
				+        super().__init__(level=level, **kwargs)
			
 
				         self.level = level
			
 
				         if lzma is None:
			
 
				             raise ValueError('No lzma support found.')
			
@@ -270,11 +278,11 @@ class ZSTD(DecidingCompressor):
 
				     # This is a NOT THREAD SAFE implementation.
			
 
				     # Only ONE python context must be created at a time.
			
 
				     # It should work flawlessly as long as borg will call ONLY ONE compression job at time.
			
 
				-    ID = b'\x03\x00'
			
 
				+    ID = b'\x03'
			
 
				     name = 'zstd'
			
 
				 
			
 
				     def __init__(self, level=3, **kwargs):
			
 
				-        super().__init__(**kwargs)
			
 
				+        super().__init__(level=level, **kwargs)
			
 
				         self.level = level
			
 
				 
			
 
				     def _decide(self, idata):
			
@@ -331,14 +339,52 @@ class ZSTD(DecidingCompressor):
 
				         return dest[:osize]
			
 
				 
			
 
				 
			
 
				-class ZLIB(CompressorBase):
			
 
				+class ZLIB(DecidingCompressor):
			
 
				     """
			
 
				     zlib compression / decompression (python stdlib)
			
 
				     """
			
 
				-    ID = b'\x08\x00'  # not used here, see detect()
			
 
				-                      # avoid all 0x.8.. IDs elsewhere!
			
 
				+    ID = b'\x05'
			
 
				     name = 'zlib'
			
 
				 
			
 
				+    def __init__(self, level=6, **kwargs):
			
 
				+        super().__init__(level=level, **kwargs)
			
 
				+        self.level = level
			
 
				+
			
 
				+    def _decide(self, data):
			
 
				+        """
			
 
				+        Decides what to do with *data*. Returns (compressor, zlib_data).
			
 
				+
			
 
				+        *zlib_data* is the ZLIB result if *compressor* is ZLIB as well, otherwise it is None.
			
 
				+        """
			
 
				+        zlib_data = zlib.compress(data, self.level)
			
 
				+        if len(zlib_data) < len(data):
			
 
				+            return self, zlib_data
			
 
				+        else:
			
 
				+            return NONE_COMPRESSOR, None
			
 
				+
			
 
				+    def decompress(self, data):
			
 
				+        data = super().decompress(data)
			
 
				+        try:
			
 
				+            return zlib.decompress(data)
			
 
				+        except zlib.error as e:
			
 
				+            raise DecompressionError(str(e)) from None
			
 
				+
			
 
				+
			
 
				+class ZLIB_legacy(CompressorBase):
			
 
				+    """
			
 
				+    zlib compression / decompression (python stdlib)
			
 
				+
			
 
				+    Note: This is the legacy ZLIB support as used by borg < 1.3.
			
 
				+          It still suffers from attic *only* supporting zlib and not having separate
			
 
				+          ID bytes to differentiate between differently compressed chunks.
			
 
				+          This just works because zlib compressed stuff always starts with 0x.8.. bytes.
			
 
				+          Newer borg uses the ZLIB class that has separate ID bytes (as all the other
			
 
				+          compressors) and does not need this hack.
			
 
				+    """
			
 
				+    ID = b'\x08'  # not used here, see detect()
			
 
				+    # avoid all 0x.8 IDs elsewhere!
			
 
				+    name = 'zlib_legacy'
			
 
				+
			
 
				     @classmethod
			
 
				     def detect(cls, data):
			
 
				         # matches misc. patterns 0x.8.. used by zlib
			
@@ -348,7 +394,7 @@ class ZLIB(CompressorBase):
 
				         return check_ok and is_deflate
			
 
				 
			
 
				     def __init__(self, level=6, **kwargs):
			
 
				-        super().__init__(**kwargs)
			
 
				+        super().__init__(level=level, **kwargs)
			
 
				         self.level = level
			
 
				 
			
 
				     def compress(self, data):
			
@@ -440,14 +486,14 @@ class ObfuscateSize(CompressorBase):
 
				     """
			
 
				     Meta-Compressor that obfuscates the compressed data size.
			
 
				     """
			
 
				-    ID = b'\x04\x00'
			
 
				+    ID = b'\x04'
			
 
				     name = 'obfuscate'
			
 
				 
			
 
				-    header_fmt = Struct('>I')
			
 
				+    header_fmt = Struct('<I')
			
 
				     header_len = len(header_fmt.pack(0))
			
 
				 
			
 
				     def __init__(self, level=None, compressor=None):
			
 
				-        super().__init__()
			
 
				+        super().__init__(level=level)  # data will be encrypted, so we can tell the level
			
 
				         self.compressor = compressor
			
 
				         if level is None:
			
 
				             pass  # decompression
			
@@ -502,13 +548,14 @@ COMPRESSOR_TABLE = {
 
				     CNONE.name: CNONE,
			
 
				     LZ4.name: LZ4,
			
 
				     ZLIB.name: ZLIB,
			
 
				+    ZLIB_legacy.name: ZLIB_legacy,
			
 
				     LZMA.name: LZMA,
			
 
				     Auto.name: Auto,
			
 
				     ZSTD.name: ZSTD,
			
 
				     ObfuscateSize.name: ObfuscateSize,
			
 
				 }
			
 
				 # List of possible compression types. Does not include Auto, since it is a meta-Compressor.
			
 
				-COMPRESSOR_LIST = [LZ4, ZSTD, CNONE, ZLIB, LZMA, ObfuscateSize, ]  # check fast stuff first
			
 
				+COMPRESSOR_LIST = [LZ4, ZSTD, CNONE, ZLIB, ZLIB_legacy, LZMA, ObfuscateSize, ]  # check fast stuff first
			
 
				 
			
 
				 def get_compressor(name, **kwargs):
			
 
				     cls = COMPRESSOR_TABLE[name]
			
@@ -554,7 +601,7 @@ class CompressionSpec:
 
				         self.name = values[0]
			
 
				         if self.name in ('none', 'lz4', ):
			
 
				             return
			
 
				-        elif self.name in ('zlib', 'lzma', ):
			
 
				+        elif self.name in ('zlib', 'lzma', 'zlib_legacy'):  # zlib_legacy just for testing
			
 
				             if count < 2:
			
 
				                 level = 6  # default compression level in py stdlib
			
 
				             elif count == 2:
			
@@ -597,7 +644,7 @@ class CompressionSpec:
 
				     def compressor(self):
			
 
				         if self.name in ('none', 'lz4', ):
			
 
				             return get_compressor(self.name)
			
 
				-        elif self.name in ('zlib', 'lzma', 'zstd', ):
			
 
				+        elif self.name in ('zlib', 'lzma', 'zstd', 'zlib_legacy'):
			
 
				             return get_compressor(self.name, level=self.level)
			
 
				         elif self.name == 'auto':
			
 
				             return get_compressor(self.name, compressor=self.inner.compressor)
			
--- a/src/borg/constants.py
+++ b/src/borg/constants.py
@@ -1,5 +1,5 @@
 
				 # this set must be kept complete, otherwise the RobustUnpacker might malfunction:
			
 
				-ITEM_KEYS = frozenset(['path', 'source', 'rdev', 'chunks', 'chunks_healthy', 'hardlink_master',
			
 
				+ITEM_KEYS = frozenset(['path', 'source', 'rdev', 'chunks', 'chunks_healthy', 'hardlink_master', 'hlid',
			
 
				                        'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime', 'birthtime', 'size',
			
 
				                        'xattrs', 'bsdflags', 'acl_nfs4', 'acl_access', 'acl_default', 'acl_extended',
			
 
				                        'part'])
			
--- a/src/borg/fuse.py
+++ b/src/borg/fuse.py
@@ -35,7 +35,8 @@ from .crypto.low_level import blake2b_128
 
				 from .archiver import Archiver
			
 
				 from .archive import Archive, get_item_uid_gid
			
 
				 from .hashindex import FuseVersionsIndex
			
 
				-from .helpers import daemonize, daemonizing, hardlinkable, signal_handler, format_file_size, Error
			
 
				+from .helpers import daemonize, daemonizing, signal_handler, format_file_size, Error
			
 
				+from .helpers import HardLinkManager
			
 
				 from .helpers import msgpack
			
 
				 from .item import Item
			
 
				 from .lrucache import LRUCache
			
@@ -339,15 +340,9 @@ class FuseBackend:
 
				                           consider_part_files=self._args.consider_part_files)
			
 
				         strip_components = self._args.strip_components
			
 
				         matcher = Archiver.build_matcher(self._args.patterns, self._args.paths)
			
 
				-        partial_extract = not matcher.empty() or strip_components
			
 
				-        hardlink_masters = {} if partial_extract else None
			
 
				+        hlm = HardLinkManager(id_type=bytes, info_type=str)  # hlid -> path
			
 
				 
			
 
				-        def peek_and_store_hardlink_masters(item, matched):
			
 
				-            if (partial_extract and not matched and hardlinkable(item.mode) and
			
 
				-                    item.get('hardlink_master', True) and 'source' not in item):
			
 
				-                hardlink_masters[item.get('path')] = (item.get('chunks'), None)
			
 
				-
			
 
				-        filter = Archiver.build_filter(matcher, peek_and_store_hardlink_masters, strip_components)
			
 
				+        filter = Archiver.build_filter(matcher, strip_components)
			
 
				         for item_inode, item in self.cache.iter_archive_items(archive.metadata.items, filter=filter,
			
 
				                                                               consider_part_files=self._args.consider_part_files):
			
 
				             if strip_components:
			
@@ -369,15 +364,13 @@ class FuseBackend:
 
				             parent = 1
			
 
				             for segment in segments[:-1]:
			
 
				                 parent = self._process_inner(segment, parent)
			
 
				-            self._process_leaf(segments[-1], item, parent, prefix, is_dir, item_inode,
			
 
				-                               hardlink_masters, strip_components)
			
 
				+            self._process_leaf(segments[-1], item, parent, prefix, is_dir, item_inode, hlm)
			
 
				         duration = time.perf_counter() - t0
			
 
				         logger.debug('fuse: _process_archive completed in %.1f s for archive %s', duration, archive.name)
			
 
				 
			
 
				-    def _process_leaf(self, name, item, parent, prefix, is_dir, item_inode, hardlink_masters, stripped_components):
			
 
				+    def _process_leaf(self, name, item, parent, prefix, is_dir, item_inode, hlm):
			
 
				         path = item.path
			
 
				         del item.path  # save some space
			
 
				-        hardlink_masters = hardlink_masters or {}
			
 
				 
			
 
				         def file_version(item, path):
			
 
				             if 'chunks' in item:
			
@@ -402,10 +395,9 @@ class FuseBackend:
 
				             version_enc = os.fsencode('.%05d' % version)
			
 
				             return name + version_enc + ext
			
 
				 
			
 
				-        if 'source' in item and hardlinkable(item.mode):
			
 
				-            source = os.sep.join(item.source.split(os.sep)[stripped_components:])
			
 
				-            chunks, link_target = hardlink_masters.get(item.source, (None, source))
			
 
				-            if link_target:
			
 
				+        if 'hlid' in item:
			
 
				+            link_target = hlm.retrieve(id=item.hlid, default=None)
			
 
				+            if link_target is not None:
			
 
				                 # Hard link was extracted previously, just link
			
 
				                 link_target = os.fsencode(link_target)
			
 
				                 if self.versions:
			
@@ -415,19 +407,16 @@ class FuseBackend:
 
				                 try:
			
 
				                     inode = self.find_inode(link_target, prefix)
			
 
				                 except KeyError:
			
 
				-                    logger.warning('Skipping broken hard link: %s -> %s', path, source)
			
 
				+                    logger.warning('Skipping broken hard link: %s -> %s', path, link_target)
			
 
				                     return
			
 
				                 item = self.get_item(inode)
			
 
				                 item.nlink = item.get('nlink', 1) + 1
			
 
				                 self._items[inode] = item
			
 
				-            elif chunks is not None:
			
 
				-                # assign chunks to this item, since the item which had the chunks was not extracted
			
 
				-                item.chunks = chunks
			
 
				+            else:
			
 
				                 inode = item_inode
			
 
				                 self._items[inode] = item
			
 
				-                if hardlink_masters:
			
 
				-                    # Update master entry with extracted item path, so that following hardlinks don't extract twice.
			
 
				-                    hardlink_masters[item.source] = (None, path)
			
 
				+                # remember extracted item path, so that following hardlinks don't extract twice.
			
 
				+                hlm.remember(id=item.hlid, info=path)
			
 
				         else:
			
 
				             inode = item_inode
			
 
				 
			
@@ -436,7 +425,7 @@ class FuseBackend:
 
				             enc_path = os.fsencode(path)
			
 
				             version = file_version(item, enc_path)
			
 
				             if version is not None:
			
 
				-                # regular file, with contents - maybe a hardlink master
			
 
				+                # regular file, with contents
			
 
				                 name = make_versioned_name(name, version)
			
 
				                 self.file_versions[enc_path] = version
			
 
				 
			
--- a/src/borg/helpers/fs.py
+++ b/src/borg/helpers/fs.py
@@ -1,4 +1,5 @@
 
				 import errno
			
 
				+import hashlib
			
 
				 import os
			
 
				 import os.path
			
 
				 import re
			
@@ -165,9 +166,76 @@ def make_path_safe(path):
 
				     return _safe_re.sub('', path) or '.'
			
 
				 
			
 
				 
			
 
				-def hardlinkable(mode):
			
 
				-    """return True if we support hardlinked items of this type"""
			
 
				-    return stat.S_ISREG(mode) or stat.S_ISBLK(mode) or stat.S_ISCHR(mode) or stat.S_ISFIFO(mode)
			
 
				+class HardLinkManager:
			
 
				+    """
			
 
				+    Manage hardlinks (and avoid code duplication doing so).
			
 
				+
			
 
				+    A) When creating a borg2 archive from the filesystem, we have to maintain a mapping like:
			
 
				+       (dev, ino) -> (hlid, chunks)  # for fs_hl_targets
			
 
				+       If we encounter the same (dev, ino) again later, we'll just re-use the hlid and chunks list.
			
 
				+
			
 
				+    B) When extracting a borg2 archive to the filesystem, we have to maintain a mapping like:
			
 
				+       hlid -> path
			
 
				+       If we encounter the same hlid again later, we hardlink to the path of the already extracted content of same hlid.
			
 
				+
			
 
				+    C) When transferring from a borg1 archive, we need:
			
 
				+       path -> chunks, chunks_healthy  # for borg1_hl_targets
			
 
				+       If we encounter a regular file item with source == path later, we reuse chunks and chunks_healthy
			
 
				+       and create the same hlid = hardlink_id_from_path(source).
			
 
				+
			
 
				+    D) When importing a tar file (simplified 1-pass way for now, not creating borg hardlink items):
			
 
				+       path -> chunks
			
 
				+       If we encounter a LNK tar entry later with linkname==path, we re-use the chunks and create a regular file item.
			
 
				+       For better hardlink support (including the very first hardlink item for each group of same-target hardlinks),
			
 
				+       we would need a 2-pass processing, which is not yet implemented.
			
 
				+    """
			
 
				+    def __init__(self, *, id_type, info_type):
			
 
				+        self._map = {}
			
 
				+        self.id_type = id_type
			
 
				+        self.info_type = info_type
			
 
				+
			
 
				+    def borg1_hardlinkable(self, mode):  # legacy
			
 
				+        return stat.S_ISREG(mode) or stat.S_ISBLK(mode) or stat.S_ISCHR(mode) or stat.S_ISFIFO(mode)
			
 
				+
			
 
				+    def borg1_hardlink_master(self, item):  # legacy
			
 
				+        return item.get('hardlink_master', True) and 'source' not in item and self.borg1_hardlinkable(item.mode)
			
 
				+
			
 
				+    def borg1_hardlink_slave(self, item):  # legacy
			
 
				+        return 'source' in item and self.borg1_hardlinkable(item.mode)
			
 
				+
			
 
				+    def hardlink_id_from_path(self, path):
			
 
				+        """compute a hardlink id from a path"""
			
 
				+        assert isinstance(path, bytes)
			
 
				+        return hashlib.sha256(path).digest()
			
 
				+
			
 
				+    def hardlink_id_from_inode(self, *, ino, dev):
			
 
				+        """compute a hardlink id from an inode"""
			
 
				+        assert isinstance(ino, int)
			
 
				+        assert isinstance(dev, int)
			
 
				+        return hashlib.sha256(f'{ino}/{dev}'.encode()).digest()
			
 
				+
			
 
				+    def remember(self, *, id, info):
			
 
				+        """
			
 
				+        remember stuff from a (usually contentful) item.
			
 
				+
			
 
				+        :param id: some id used to reference to the contentful item, could be:
			
 
				+                   a path (tar style, old borg style) [bytes]
			
 
				+                   a hlid (new borg style) [bytes]
			
 
				+                   a (dev, inode) tuple (filesystem)
			
 
				+        :param info: information to remember, could be:
			
 
				+                     chunks / chunks_healthy list
			
 
				+                     hlid
			
 
				+        """
			
 
				+        assert isinstance(id, self.id_type), f"key is {key!r}, not of type {self.key_type}"
			
 
				+        assert isinstance(info, self.info_type), f"info is {info!r}, not of type {self.info_type}"
			
 
				+        self._map[id] = info
			
 
				+
			
 
				+    def retrieve(self, id, *, default=None):
			
 
				+        """
			
 
				+        retrieve stuff to use it in a (usually contentless) item.
			
 
				+        """
			
 
				+        assert isinstance(id, self.id_type)
			
 
				+        return self._map.get(id, default)
			
 
				 
			
 
				 
			
 
				 def scandir_keyfunc(dirent):
			
--- a/src/borg/helpers/msgpack.py
+++ b/src/borg/helpers/msgpack.py
@@ -24,7 +24,7 @@ from msgpack import unpackb as mp_unpackb
 
				 from msgpack import unpack as mp_unpack
			
 
				 from msgpack import version as mp_version
			
 
				 
			
 
				-from msgpack import ExtType
			
 
				+from msgpack import ExtType, Timestamp
			
 
				 from msgpack import OutOfData
			
 
				 
			
 
				 
			
@@ -164,7 +164,7 @@ def get_limited_unpacker(kind):
 
				     return Unpacker(**args)
			
 
				 
			
 
				 
			
 
				-def bigint_to_int(mtime):
			
 
				+def bigint_to_int(mtime):  # legacy
			
 
				     """Convert bytearray to int
			
 
				     """
			
 
				     if isinstance(mtime, bytes):
			
@@ -172,7 +172,7 @@ def bigint_to_int(mtime):
 
				     return mtime
			
 
				 
			
 
				 
			
 
				-def int_to_bigint(value):
			
 
				+def int_to_bigint(value):  # legacy
			
 
				     """Convert integers larger than 64 bits to bytearray
			
 
				 
			
 
				     Smaller integers are left alone
			
@@ -180,3 +180,14 @@ def int_to_bigint(value):
 
				     if value.bit_length() > 63:
			
 
				         return value.to_bytes((value.bit_length() + 9) // 8, 'little', signed=True)
			
 
				     return value
			
 
				+
			
 
				+
			
 
				+def int_to_timestamp(ns):
			
 
				+    return Timestamp.from_unix_nano(ns)
			
 
				+
			
 
				+
			
 
				+def timestamp_to_int(ts):
			
 
				+    if isinstance(ts, Timestamp):
			
 
				+        return ts.to_unix_nano()
			
 
				+    # legacy support note: we need to keep the bigint conversion for compatibility with borg < 1.3 archives.
			
 
				+    return bigint_to_int(ts)
			
--- a/src/borg/helpers/parseformat.py
+++ b/src/borg/helpers/parseformat.py
@@ -19,6 +19,7 @@ logger = create_logger()
 
				 
			
 
				 from .errors import Error
			
 
				 from .fs import get_keys_dir
			
 
				+from .msgpack import Timestamp
			
 
				 from .time import OutputTimestamp, format_time, to_localtime, safe_timestamp, safe_s
			
 
				 from .. import __version__ as borg_version
			
 
				 from .. import __version_tuple__ as borg_version_tuple
			
@@ -694,7 +695,8 @@ class ItemFormatter(BaseFormatter):
 
				     KEY_DESCRIPTIONS = {
			
 
				         'bpath': 'verbatim POSIX path, can contain any character except NUL',
			
 
				         'path': 'path interpreted as text (might be missing non-text characters, see bpath)',
			
 
				-        'source': 'link target for links (identical to linktarget)',
			
 
				+        'source': 'link target for symlinks (identical to linktarget)',
			
 
				+        'hlid': 'hard link identity (same if hardlinking same fs object)',
			
 
				         'extra': 'prepends {source} with " -> " for soft links and " link to " for hard links',
			
 
				         'csize': 'compressed size',
			
 
				         'dsize': 'deduplicated size',
			
@@ -705,7 +707,7 @@ class ItemFormatter(BaseFormatter):
 
				         'health': 'either "healthy" (file ok) or "broken" (if file has all-zero replacement chunks)',
			
 
				     }
			
 
				     KEY_GROUPS = (
			
 
				-        ('type', 'mode', 'uid', 'gid', 'user', 'group', 'path', 'bpath', 'source', 'linktarget', 'flags'),
			
 
				+        ('type', 'mode', 'uid', 'gid', 'user', 'group', 'path', 'bpath', 'source', 'linktarget', 'hlid', 'flags'),
			
 
				         ('size', 'csize', 'dsize', 'dcsize', 'num_chunks', 'unique_chunks'),
			
 
				         ('mtime', 'ctime', 'atime', 'isomtime', 'isoctime', 'isoatime'),
			
 
				         tuple(sorted(hash_algorithms)),
			
@@ -801,11 +803,9 @@ class ItemFormatter(BaseFormatter):
 
				         extra = ''
			
 
				         if source:
			
 
				             source = remove_surrogates(source)
			
 
				-            if item_type == 'l':
			
 
				-                extra = ' -> %s' % source
			
 
				-            else:
			
 
				-                mode = 'h' + mode[1:]
			
 
				-                extra = ' link to %s' % source
			
 
				+            extra = ' -> %s' % source
			
 
				+        hlid = item.get('hlid')
			
 
				+        hlid = bin_to_hex(hlid) if hlid else ''
			
 
				         item_data['type'] = item_type
			
 
				         item_data['mode'] = mode
			
 
				         item_data['user'] = item.user or item.uid
			
@@ -821,6 +821,7 @@ class ItemFormatter(BaseFormatter):
 
				             item_data['health'] = 'broken' if 'chunks_healthy' in item else 'healthy'
			
 
				         item_data['source'] = source
			
 
				         item_data['linktarget'] = source
			
 
				+        item_data['hlid'] = hlid
			
 
				         item_data['flags'] = item.get('bsdflags')
			
 
				         for key in self.used_call_keys:
			
 
				             item_data[key] = self.call_keys[key](item)
			
@@ -1043,6 +1044,8 @@ def prepare_dump_dict(d):
 
				                 value = decode_tuple(value)
			
 
				             elif isinstance(value, bytes):
			
 
				                 value = decode_bytes(value)
			
 
				+            elif isinstance(value, Timestamp):
			
 
				+                value = value.to_unix_nano()
			
 
				             if isinstance(key, bytes):
			
 
				                 key = key.decode()
			
 
				             res[key] = value
			
--- a/src/borg/item.pyx
+++ b/src/borg/item.pyx
@@ -3,9 +3,9 @@ from collections import namedtuple
 
				 
			
 
				 from .constants import ITEM_KEYS, ARCHIVE_KEYS
			
 
				 from .helpers import safe_encode, safe_decode
			
 
				-from .helpers import bigint_to_int, int_to_bigint
			
 
				 from .helpers import StableDict
			
 
				 from .helpers import format_file_size
			
 
				+from .helpers.msgpack import timestamp_to_int, int_to_timestamp
			
 
				 
			
 
				 
			
 
				 cdef extern from "_item.c":
			
@@ -171,17 +171,17 @@ class Item(PropDict):
 
				     rdev = PropDict._make_property('rdev', int)
			
 
				     bsdflags = PropDict._make_property('bsdflags', int)
			
 
				 
			
 
				-    # note: we need to keep the bigint conversion for compatibility with borg 1.0 archives.
			
 
				-    atime = PropDict._make_property('atime', int, 'bigint', encode=int_to_bigint, decode=bigint_to_int)
			
 
				-    ctime = PropDict._make_property('ctime', int, 'bigint', encode=int_to_bigint, decode=bigint_to_int)
			
 
				-    mtime = PropDict._make_property('mtime', int, 'bigint', encode=int_to_bigint, decode=bigint_to_int)
			
 
				-    birthtime = PropDict._make_property('birthtime', int, 'bigint', encode=int_to_bigint, decode=bigint_to_int)
			
 
				+    atime = PropDict._make_property('atime', int, 'int (ns)', encode=int_to_timestamp, decode=timestamp_to_int)
			
 
				+    ctime = PropDict._make_property('ctime', int, 'int (ns)', encode=int_to_timestamp, decode=timestamp_to_int)
			
 
				+    mtime = PropDict._make_property('mtime', int, 'int (ns)', encode=int_to_timestamp, decode=timestamp_to_int)
			
 
				+    birthtime = PropDict._make_property('birthtime', int, 'int (ns)', encode=int_to_timestamp, decode=timestamp_to_int)
			
 
				 
			
 
				     # size is only present for items with a chunk list and then it is sum(chunk_sizes)
			
 
				     # compatibility note: this is a new feature, in old archives size will be missing.
			
 
				     size = PropDict._make_property('size', int)
			
 
				 
			
 
				-    hardlink_master = PropDict._make_property('hardlink_master', bool)
			
 
				+    hlid = PropDict._make_property('hlid', bytes)  # hard link id: same value means same hard link.
			
 
				+    hardlink_master = PropDict._make_property('hardlink_master', bool)  # legacy
			
 
				 
			
 
				     chunks = PropDict._make_property('chunks', (list, type(None)), 'list or None')
			
 
				     chunks_healthy = PropDict._make_property('chunks_healthy', (list, type(None)), 'list or None')
			
@@ -214,7 +214,6 @@ class Item(PropDict):
 
				         except AttributeError:
			
 
				             if stat.S_ISLNK(self.mode):
			
 
				                 # get out of here quickly. symlinks have no own chunks, their fs size is the length of the target name.
			
 
				-                # also, there is the dual-use issue of .source (#2343), so don't confuse it with a hardlink slave.
			
 
				                 return len(self.source)
			
 
				             # no precomputed (c)size value available, compute it:
			
 
				             try:
			
--- a/src/borg/testsuite/archiver.py
+++ b/src/borg/testsuite/archiver.py
@@ -321,7 +321,7 @@ class ArchiverTestCaseBase(BaseTestCase):
 
				                 contents = b'X' * size
			
 
				             fd.write(contents)
			
 
				 
			
 
				-    def create_test_files(self):
			
 
				+    def create_test_files(self, create_hardlinks=True):
			
 
				         """Create a minimal test case including all supported file types
			
 
				         """
			
 
				         # File
			
@@ -332,7 +332,7 @@ class ArchiverTestCaseBase(BaseTestCase):
 
				         # File mode
			
 
				         os.chmod('input/file1', 0o4755)
			
 
				         # Hard link
			
 
				-        if are_hardlinks_supported():
			
 
				+        if are_hardlinks_supported() and create_hardlinks:
			
 
				             os.link(os.path.join(self.input_path, 'file1'),
			
 
				                     os.path.join(self.input_path, 'hardlink'))
			
 
				         # Symlink
			
@@ -432,7 +432,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
				             self.assert_in(name, list_output)
			
 
				         self.assert_dirs_equal('input', 'output/input')
			
 
				         info_output = self.cmd('info', self.repository_location + '::test')
			
 
				-        item_count = 4 if has_lchflags else 5  # one file is UF_NODUMP
			
 
				+        item_count = 5 if has_lchflags else 6  # one file is UF_NODUMP
			
 
				         self.assert_in('Number of files: %d' % item_count, info_output)
			
 
				         shutil.rmtree(self.cache_path)
			
 
				         info_output2 = self.cmd('info', self.repository_location + '::test')
			
@@ -506,6 +506,29 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
				             self.cmd('extract', self.repository_location + '::test')
			
 
				             assert os.readlink('input/link1') == 'somewhere'
			
 
				 
			
 
				+    @pytest.mark.skipif(not are_symlinks_supported() or not are_hardlinks_supported(),
			
 
				+                        reason='symlinks or hardlinks not supported')
			
 
				+    def test_hardlinked_symlinks_extract(self):
			
 
				+        self.create_regular_file('target', size=1024)
			
 
				+        with changedir('input'):
			
 
				+            os.symlink('target', 'symlink1')
			
 
				+            os.link('symlink1', 'symlink2', follow_symlinks=False)
			
 
				+        self.cmd('init', '--encryption=repokey', self.repository_location)
			
 
				+        self.cmd('create', self.repository_location + '::test', 'input')
			
 
				+        with changedir('output'):
			
 
				+            output = self.cmd('extract', self.repository_location + '::test')
			
 
				+            print(output)
			
 
				+            with changedir('input'):
			
 
				+                assert os.path.exists('target')
			
 
				+                assert os.readlink('symlink1') == 'target'
			
 
				+                assert os.readlink('symlink2') == 'target'
			
 
				+                st1 = os.stat('symlink1', follow_symlinks=False)
			
 
				+                st2 = os.stat('symlink2', follow_symlinks=False)
			
 
				+                assert st1.st_nlink == 2
			
 
				+                assert st2.st_nlink == 2
			
 
				+                assert st1.st_ino == st2.st_ino
			
 
				+                assert st1.st_size == st2.st_size
			
 
				+
			
 
				     @pytest.mark.skipif(not is_utime_fully_supported(), reason='cannot properly setup and execute test without utime')
			
 
				     def test_atime(self):
			
 
				         def has_noatime(some_file):
			
@@ -2442,7 +2465,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
				     def test_compression_zlib_compressible(self):
			
 
				         size, csize = self._get_sizes('zlib', compressible=True)
			
 
				         assert csize < size * 0.1
			
 
				-        assert csize == 35
			
 
				+        assert csize == 37
			
 
				 
			
 
				     def test_compression_zlib_uncompressible(self):
			
 
				         size, csize = self._get_sizes('zlib', compressible=False)
			
@@ -2451,7 +2474,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
				     def test_compression_auto_compressible(self):
			
 
				         size, csize = self._get_sizes('auto,zlib', compressible=True)
			
 
				         assert csize < size * 0.1
			
 
				-        assert csize == 35  # same as compression 'zlib'
			
 
				+        assert csize == 37  # same as compression 'zlib'
			
 
				 
			
 
				     def test_compression_auto_uncompressible(self):
			
 
				         size, csize = self._get_sizes('auto,zlib', compressible=False)
			
@@ -2661,7 +2684,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
				                 hl3 = os.path.join(mountpoint, 'input', 'hardlink3', 'hardlink3.00001')
			
 
				                 assert os.stat(hl1).st_ino == os.stat(hl2).st_ino == os.stat(hl3).st_ino
			
 
				                 assert open(hl3, 'rb').read() == b'123456'
			
 
				-        # similar again, but exclude the hardlink master:
			
 
				+        # similar again, but exclude the 1st hardlink:
			
 
				         with self.fuse_mount(self.repository_location, mountpoint, '-o', 'versions', '-e', 'input/hardlink1'):
			
 
				             if are_hardlinks_supported():
			
 
				                 hl2 = os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00001')
			
@@ -3475,7 +3498,7 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
 
				             assert os.stat('input/dir1/source2').st_nlink == 2
			
 
				 
			
 
				     def test_import_tar(self, tar_format='PAX'):
			
 
				-        self.create_test_files()
			
 
				+        self.create_test_files(create_hardlinks=False)  # hardlinks become separate files
			
 
				         os.unlink('input/flagfile')
			
 
				         self.cmd('init', '--encryption=none', self.repository_location)
			
 
				         self.cmd('create', self.repository_location + '::src', 'input')
			
@@ -3489,7 +3512,7 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
 
				     def test_import_tar_gz(self, tar_format='GNU'):
			
 
				         if not shutil.which('gzip'):
			
 
				             pytest.skip('gzip is not installed')
			
 
				-        self.create_test_files()
			
 
				+        self.create_test_files(create_hardlinks=False)  # hardlinks become separate files
			
 
				         os.unlink('input/flagfile')
			
 
				         self.cmd('init', '--encryption=none', self.repository_location)
			
 
				         self.cmd('create', self.repository_location + '::src', 'input')
			
@@ -3850,7 +3873,7 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
 
				                 'username': 'bar',
			
 
				                 'name': 'archive1',
			
 
				                 'time': '2016-12-15T18:49:51.849711',
			
 
				-                'version': 1,
			
 
				+                'version': 2,
			
 
				             })
			
 
				             archive_id = key.id_hash(archive)
			
 
				             repository.put(archive_id, key.encrypt(archive_id, archive))
			
@@ -3907,35 +3930,6 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
 
				             repository.commit(compact=False)
			
 
				         self.cmd('check', self.repository_location, exit_code=1)
			
 
				 
			
 
				-    def test_attic013_acl_bug(self):
			
 
				-        # Attic up to release 0.13 contained a bug where every item unintentionally received
			
 
				-        # a b'acl'=None key-value pair.
			
 
				-        # This bug can still live on in Borg repositories (through borg upgrade).
			
 
				-        class Attic013Item:
			
 
				-            def as_dict(self):
			
 
				-                return {
			
 
				-                    # These are required
			
 
				-                    b'path': '1234',
			
 
				-                    b'mtime': 0,
			
 
				-                    b'mode': 0,
			
 
				-                    b'user': b'0',
			
 
				-                    b'group': b'0',
			
 
				-                    b'uid': 0,
			
 
				-                    b'gid': 0,
			
 
				-                    # acl is the offending key.
			
 
				-                    b'acl': None,
			
 
				-                }
			
 
				-
			
 
				-        archive, repository = self.open_archive('archive1')
			
 
				-        with repository:
			
 
				-            manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
			
 
				-            with Cache(repository, key, manifest) as cache:
			
 
				-                archive = Archive(repository, key, manifest, '0.13', cache=cache, create=True)
			
 
				-                archive.items_buffer.add(Attic013Item())
			
 
				-                archive.save()
			
 
				-        self.cmd('check', self.repository_location, exit_code=0)
			
 
				-        self.cmd('list', self.repository_location + '::0.13', exit_code=0)
			
 
				-
			
 
				 
			
 
				 class ManifestAuthenticationTest(ArchiverTestCaseBase):
			
 
				     def spoof_manifest(self, repository):
			
@@ -4473,26 +4467,23 @@ def test_chunk_content_equal():
 
				 
			
 
				 
			
 
				 class TestBuildFilter:
			
 
				-    @staticmethod
			
 
				-    def peek_and_store_hardlink_masters(item, matched):
			
 
				-        pass
			
 
				 
			
 
				     def test_basic(self):
			
 
				         matcher = PatternMatcher()
			
 
				         matcher.add([parse_pattern('included')], IECommand.Include)
			
 
				-        filter = Archiver.build_filter(matcher, self.peek_and_store_hardlink_masters, 0)
			
 
				+        filter = Archiver.build_filter(matcher, 0)
			
 
				         assert filter(Item(path='included'))
			
 
				         assert filter(Item(path='included/file'))
			
 
				         assert not filter(Item(path='something else'))
			
 
				 
			
 
				     def test_empty(self):
			
 
				         matcher = PatternMatcher(fallback=True)
			
 
				-        filter = Archiver.build_filter(matcher, self.peek_and_store_hardlink_masters, 0)
			
 
				+        filter = Archiver.build_filter(matcher, 0)
			
 
				         assert filter(Item(path='anything'))
			
 
				 
			
 
				     def test_strip_components(self):
			
 
				         matcher = PatternMatcher(fallback=True)
			
 
				-        filter = Archiver.build_filter(matcher, self.peek_and_store_hardlink_masters, strip_components=1)
			
 
				+        filter = Archiver.build_filter(matcher, strip_components=1)
			
 
				         assert not filter(Item(path='shallow'))
			
 
				         assert not filter(Item(path='shallow/'))  # can this even happen? paths are normalized...
			
 
				         assert filter(Item(path='deep enough/file'))
			
--- a/src/borg/testsuite/compress.py
+++ b/src/borg/testsuite/compress.py
@@ -88,11 +88,11 @@ def test_autodetect_invalid():
 
				         Compressor(**params).decompress(b'\x08\x00notreallyzlib')
			
 
				 
			
 
				 
			
 
				-def test_zlib_compat():
			
 
				+def test_zlib_legacy_compat():
			
 
				     # for compatibility reasons, we do not add an extra header for zlib,
			
 
				     # nor do we expect one when decompressing / autodetecting
			
 
				     for level in range(10):
			
 
				-        c = get_compressor(name='zlib', level=level)
			
 
				+        c = get_compressor(name='zlib_legacy', level=level)
			
 
				         cdata1 = c.compress(data)
			
 
				         cdata2 = zlib.compress(data, level)
			
 
				         assert cdata1 == cdata2
			
--- a/src/borg/testsuite/item.py
+++ b/src/borg/testsuite/item.py
@@ -3,6 +3,7 @@ import pytest
 
				 from ..cache import ChunkListEntry
			
 
				 from ..item import Item
			
 
				 from ..helpers import StableDict
			
 
				+from ..helpers.msgpack import Timestamp
			
 
				 
			
 
				 
			
 
				 def test_item_empty():
			
@@ -77,15 +78,15 @@ def test_item_int_property():
 
				         item.mode = "invalid"
			
 
				 
			
 
				 
			
 
				-def test_item_bigint_property():
			
 
				+def test_item_mptimestamp_property():
			
 
				     item = Item()
			
 
				     small, big = 42, 2 ** 65
			
 
				     item.atime = small
			
 
				     assert item.atime == small
			
 
				-    assert item.as_dict() == {'atime': small}
			
 
				+    assert item.as_dict() == {'atime': Timestamp.from_unix_nano(small)}
			
 
				     item.atime = big
			
 
				     assert item.atime == big
			
 
				-    assert item.as_dict() == {'atime': b'\0' * 8 + b'\x02'}
			
 
				+    assert item.as_dict() == {'atime': Timestamp.from_unix_nano(big)}
			
 
				 
			
 
				 
			
 
				 def test_item_user_group_none():
			
--- a/src/borg/testsuite/key.py
+++ b/src/borg/testsuite/key.py
@@ -256,8 +256,8 @@ class TestKey:
 
				         plaintext = b'123456789'
			
 
				         id = key.id_hash(plaintext)
			
 
				         authenticated = key.encrypt(id, plaintext)
			
 
				-        # 0x07 is the key TYPE, \x0000 identifies no compression.
			
 
				-        assert authenticated == b'\x07\x00\x00' + plaintext
			
 
				+        # 0x07 is the key TYPE, \x00ff identifies no compression / unknown level.
			
 
				+        assert authenticated == b'\x07\x00\xff' + plaintext
			
 
				 
			
 
				     def test_blake2_authenticated_encrypt(self, monkeypatch):
			
 
				         monkeypatch.setenv('BORG_PASSPHRASE', 'test')
			
@@ -267,8 +267,8 @@ class TestKey:
 
				         plaintext = b'123456789'
			
 
				         id = key.id_hash(plaintext)
			
 
				         authenticated = key.encrypt(id, plaintext)
			
 
				-        # 0x06 is the key TYPE, 0x0000 identifies no compression.
			
 
				-        assert authenticated == b'\x06\x00\x00' + plaintext
			
 
				+        # 0x06 is the key TYPE, 0x00ff identifies no compression / unknown level.
			
 
				+        assert authenticated == b'\x06\x00\xff' + plaintext
			
 
				 
			
 
				 
			
 
				 class TestTAM: