Browse Source

compute hlid from inode / device

Thomas Waldmann 3 years ago
parent
commit
32a3601e4a
3 changed files with 19 additions and 13 deletions
  1. 7 7
      src/borg/archive.py
  2. 2 2
      src/borg/archiver.py
  3. 10 4
      src/borg/helpers/fs.py

+ 7 - 7
src/borg/archive.py

@@ -1236,7 +1236,7 @@ class FilesystemObjectProcessors:
         self.show_progress = show_progress
         self.print_file_status = file_status_printer or (lambda *args: None)
 
-        self.hlm = HardLinkManager(id_type=tuple, info_type=tuple)  # (dev, ino) -> (hlid, chunks)
+        self.hlm = HardLinkManager(id_type=tuple, info_type=(list, type(None)))  # (dev, ino) -> chunks or None
         self.stats = Statistics(output_json=log_json, iec=iec)  # threading: done by cache (including progress)
         self.cwd = os.getcwd()
         self.chunker = get_chunker(*chunker_params, seed=key.chunk_seed, sparse=sparse)
@@ -1249,20 +1249,20 @@ class FilesystemObjectProcessors:
         update_map = False
         if hardlinked:
             status = 'h'  # hardlink
-            hlid, chunks = self.hlm.retrieve(id=(st.st_ino, st.st_dev), default=(None, None))
-            if hlid is None:
+            nothing = object()
+            chunks = self.hlm.retrieve(id=(st.st_ino, st.st_dev), default=nothing)
+            if chunks is nothing:
                 update_map = True
-                hlid = self.hlm.hardlink_id(item._dict['path'])
-            item.hlid = hlid
-            if chunks is not None:
+            elif chunks is not None:
                 item.chunks = chunks
+            item.hlid = self.hlm.hardlink_id_from_inode(ino=st.st_ino, dev=st.st_dev)
         yield item, status, hardlinked
         self.add_item(item, stats=self.stats)
         if update_map:
             # remember the hlid of this fs object and if the item has chunks,
             # also remember them, so we do not have to re-chunk a hardlink.
             chunks = item.chunks if 'chunks' in item else None
-            self.hlm.remember(id=(st.st_ino, st.st_dev), info=(hlid, chunks))
+            self.hlm.remember(id=(st.st_ino, st.st_dev), info=chunks)
 
     def process_dir_with_fd(self, *, path, fd, st):
         with self.create_helper(path, st, 'd', hardlinkable=False) as (item, status, hardlinked):

+ 2 - 2
src/borg/archiver.py

@@ -353,10 +353,10 @@ class Archiver:
         def upgrade_item(item):
             """upgrade item as needed, get rid of legacy crap"""
             if hlm.borg1_hardlink_master(item):
-                item._dict['hlid'] = hlid = hlm.hardlink_id(item._dict['path'])
+                item._dict['hlid'] = hlid = hlm.hardlink_id_from_path(item._dict['path'])
                 hlm.remember(id=hlid, info=(item._dict.get('chunks'), item._dict.get('chunks_healthy')))
             elif hlm.borg1_hardlink_slave(item):
-                item._dict['hlid'] = hlid = hlm.hardlink_id(item._dict['source'])
+                item._dict['hlid'] = hlid = hlm.hardlink_id_from_path(item._dict['source'])
                 chunks, chunks_healthy = hlm.retrieve(id=hlid, default=(None, None))
                 if chunks is not None:
                     item._dict['chunks'] = chunks

+ 10 - 4
src/borg/helpers/fs.py

@@ -181,7 +181,7 @@ class HardLinkManager:
     C) When transferring from a borg1 archive, we need:
        path -> chunks, chunks_healthy  # for borg1_hl_targets
        If we encounter a regular file item with source == path later, we reuse chunks and chunks_healthy
-       and create the same hlid = hardlink_id(source).
+       and create the same hlid = hardlink_id_from_path(source).
 
     D) When importing a tar file (simplified 1-pass way for now, not creating borg hardlink items):
        path -> chunks
@@ -203,11 +203,17 @@ class HardLinkManager:
     def borg1_hardlink_slave(self, item):  # legacy
         return 'source' in item and self.borg1_hardlinkable(item.mode)
 
-    def hardlink_id(self, path):
+    def hardlink_id_from_path(self, path):
         """compute a hardlink id from a path"""
         assert isinstance(path, bytes)
         return hashlib.sha256(path).digest()
 
+    def hardlink_id_from_inode(self, *, ino, dev):
+        """compute a hardlink id from an inode"""
+        assert isinstance(ino, int)
+        assert isinstance(dev, int)
+        return hashlib.sha256(f'{ino}/{dev}'.encode()).digest()
+
     def remember(self, *, id, info):
         """
         remember stuff from a (usually contentful) item.
@@ -220,8 +226,8 @@ class HardLinkManager:
                      chunks / chunks_healthy list
                      hlid
         """
-        assert isinstance(id, self.id_type)
-        assert isinstance(info, self.info_type)
+        assert isinstance(id, self.id_type), f"key is {key!r}, not of type {self.key_type}"
+        assert isinstance(info, self.info_type), f"info is {info!r}, not of type {self.info_type}"
         self._map[id] = info
 
     def retrieve(self, id, *, default=None):