Forráskód Böngészése

Merge pull request #4455 from ThomasWaldmann/backports-17

Backports to 1.1-maint (17)
TW 6 éve
szülő
commit
0a20a03b7f

+ 23 - 0
docs/misc/logging.conf

@@ -0,0 +1,23 @@
+[loggers]
+keys=root
+
+[handlers]
+keys=logfile
+
+[formatters]
+keys=logfile
+
+[logger_root]
+level=NOTSET
+handlers=logfile
+
+[handler_logfile]
+class=FileHandler
+level=INFO
+formatter=logfile
+args=('borg.log', 'w')
+
+[formatter_logfile]
+format=%(asctime)s %(levelname)s %(message)s
+datefmt=
+class=logging.Formatter

+ 1 - 0
docs/usage_general.rst.inc

@@ -202,6 +202,7 @@ General:
         use fqdn@uniqueid.
         use fqdn@uniqueid.
     BORG_LOGGING_CONF
     BORG_LOGGING_CONF
         When set, use the given filename as INI_-style logging configuration.
         When set, use the given filename as INI_-style logging configuration.
+        A basic example conf can be found at ``docs/misc/logging.conf``.
     BORG_RSH
     BORG_RSH
         When set, use this command instead of ``ssh``. This can be used to specify ssh options, such as
         When set, use this command instead of ``ssh``. This can be used to specify ssh options, such as
         a custom identity file ``ssh -i /path/to/private/key``. See ``man ssh`` for other options. Using
         a custom identity file ``ssh -i /path/to/private/key``. See ``man ssh`` for other options. Using

+ 5 - 1
src/borg/constants.py

@@ -10,7 +10,9 @@ REQUIRED_ITEM_KEYS = frozenset(['path', 'mtime', ])
 # this set must be kept complete, otherwise rebuild_manifest might malfunction:
 # this set must be kept complete, otherwise rebuild_manifest might malfunction:
 ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'hostname', 'username', 'time', 'time_end',
 ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'hostname', 'username', 'time', 'time_end',
                           'comment', 'chunker_params',
                           'comment', 'chunker_params',
-                          'recreate_cmdline', 'recreate_source_id', 'recreate_args'])
+                          'recreate_cmdline',
+                          'recreate_source_id', 'recreate_args', 'recreate_partial_chunks',  # used in 1.1.0b1 .. b2
+                          ])
 
 
 # this is the set of keys that are always present in archives:
 # this is the set of keys that are always present in archives:
 REQUIRED_ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'time', ])
 REQUIRED_ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'time', ])
@@ -52,6 +54,8 @@ LIST_SCAN_LIMIT = 100000
 
 
 DEFAULT_SEGMENTS_PER_DIR = 1000
 DEFAULT_SEGMENTS_PER_DIR = 1000
 
 
+FD_MAX_AGE = 4 * 60  # 4 minutes
+
 CHUNK_MIN_EXP = 19  # 2**19 == 512kiB
 CHUNK_MIN_EXP = 19  # 2**19 == 512kiB
 CHUNK_MAX_EXP = 23  # 2**23 == 8MiB
 CHUNK_MAX_EXP = 23  # 2**23 == 8MiB
 HASH_WINDOW_SIZE = 0xfff  # 4095B
 HASH_WINDOW_SIZE = 0xfff  # 4095B

+ 1 - 1
src/borg/helpers.py

@@ -141,7 +141,7 @@ def check_extension_modules():
         raise ExtensionModuleError
         raise ExtensionModuleError
     if platform.API_VERSION != platform.OS_API_VERSION or platform.API_VERSION != '1.1_04':
     if platform.API_VERSION != platform.OS_API_VERSION or platform.API_VERSION != '1.1_04':
         raise ExtensionModuleError
         raise ExtensionModuleError
-    if item.API_VERSION != '1.1_02':
+    if item.API_VERSION != '1.1_03':
         raise ExtensionModuleError
         raise ExtensionModuleError
 
 
 
 

+ 5 - 7
src/borg/item.pyx

@@ -3,12 +3,12 @@
 import stat
 import stat
 from collections import namedtuple
 from collections import namedtuple
 
 
-from .constants import ITEM_KEYS
+from .constants import ITEM_KEYS, ARCHIVE_KEYS
 from .helpers import safe_encode, safe_decode
 from .helpers import safe_encode, safe_decode
 from .helpers import bigint_to_int, int_to_bigint
 from .helpers import bigint_to_int, int_to_bigint
 from .helpers import StableDict
 from .helpers import StableDict
 
 
-API_VERSION = '1.1_02'
+API_VERSION = '1.1_03'
 
 
 
 
 class PropDict:
 class PropDict:
@@ -292,10 +292,7 @@ class ArchiveItem(PropDict):
     If a ArchiveItem shall be serialized, give as_dict() method output to msgpack packer.
     If a ArchiveItem shall be serialized, give as_dict() method output to msgpack packer.
     """
     """
 
 
-    VALID_KEYS = {'version', 'name', 'items', 'cmdline', 'hostname', 'username', 'time', 'time_end',
-                  'comment', 'chunker_params',
-                  'recreate_cmdline', 'recreate_source_id', 'recreate_args', 'recreate_partial_chunks',
-                  }  # str-typed keys
+    VALID_KEYS = ARCHIVE_KEYS  # str-typed keys
 
 
     __slots__ = ("_dict", )  # avoid setting attributes not supported by properties
     __slots__ = ("_dict", )  # avoid setting attributes not supported by properties
 
 
@@ -309,8 +306,9 @@ class ArchiveItem(PropDict):
     time_end = PropDict._make_property('time_end', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
     time_end = PropDict._make_property('time_end', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
     comment = PropDict._make_property('comment', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
     comment = PropDict._make_property('comment', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
     chunker_params = PropDict._make_property('chunker_params', tuple)
     chunker_params = PropDict._make_property('chunker_params', tuple)
-    recreate_source_id = PropDict._make_property('recreate_source_id', bytes)
     recreate_cmdline = PropDict._make_property('recreate_cmdline', list)  # list of s-e-str
     recreate_cmdline = PropDict._make_property('recreate_cmdline', list)  # list of s-e-str
+    # recreate_source_id, recreate_args, recreate_partial_chunks were used in 1.1.0b1 .. b2
+    recreate_source_id = PropDict._make_property('recreate_source_id', bytes)
     recreate_args = PropDict._make_property('recreate_args', list)  # list of s-e-str
     recreate_args = PropDict._make_property('recreate_args', list)  # list of s-e-str
     recreate_partial_chunks = PropDict._make_property('recreate_partial_chunks', list)  # list of tuples
     recreate_partial_chunks = PropDict._make_property('recreate_partial_chunks', list)  # list of tuples
 
 

+ 6 - 1
src/borg/lrucache.py

@@ -39,12 +39,17 @@ class LRUCache:
         self._lru.append(key)
         self._lru.append(key)
         return value
         return value
 
 
+    def upd(self, key, value):
+        # special use only: update the value for an existing key without having to dispose it first
+        # this method complements __setitem__ which should be used for the normal use case.
+        assert key in self._cache, "Unexpected attempt to update a non-existing item."
+        self._cache[key] = value
+
     def clear(self):
     def clear(self):
         for value in self._cache.values():
         for value in self._cache.values():
             self._dispose(value)
             self._dispose(value)
         self._cache.clear()
         self._cache.clear()
 
 
-    # useful for testing
     def items(self):
     def items(self):
         return self._cache.items()
         return self._cache.items()
 
 

+ 33 - 7
src/borg/repository.py

@@ -3,6 +3,7 @@ import mmap
 import os
 import os
 import shutil
 import shutil
 import struct
 import struct
+import time
 from binascii import hexlify, unhexlify
 from binascii import hexlify, unhexlify
 from collections import defaultdict
 from collections import defaultdict
 from configparser import ConfigParser
 from configparser import ConfigParser
@@ -1164,20 +1165,21 @@ class LoggedIO:
 
 
     def __init__(self, path, limit, segments_per_dir, capacity=90):
     def __init__(self, path, limit, segments_per_dir, capacity=90):
         self.path = path
         self.path = path
-        self.fds = LRUCache(capacity,
-                            dispose=self.close_fd)
+        self.fds = LRUCache(capacity, dispose=self._close_fd)
         self.segment = 0
         self.segment = 0
         self.limit = limit
         self.limit = limit
         self.segments_per_dir = segments_per_dir
         self.segments_per_dir = segments_per_dir
         self.offset = 0
         self.offset = 0
         self._write_fd = None
         self._write_fd = None
+        self._fds_cleaned = 0
 
 
     def close(self):
     def close(self):
         self.close_segment()
         self.close_segment()
         self.fds.clear()
         self.fds.clear()
         self.fds = None  # Just to make sure we're disabled
         self.fds = None  # Just to make sure we're disabled
 
 
-    def close_fd(self, fd):
+    def _close_fd(self, ts_fd):
+        ts, fd = ts_fd
         safe_fadvise(fd.fileno(), 0, 0, 'DONTNEED')
         safe_fadvise(fd.fileno(), 0, 0, 'DONTNEED')
         fd.close()
         fd.close()
 
 
@@ -1291,13 +1293,37 @@ class LoggedIO:
         return self._write_fd
         return self._write_fd
 
 
     def get_fd(self, segment):
     def get_fd(self, segment):
-        try:
-            return self.fds[segment]
-        except KeyError:
+        # note: get_fd() returns a fd with undefined file pointer position,
+        # so callers must always seek() to desired position afterwards.
+        now = time.monotonic()
+
+        def open_fd():
             fd = open(self.segment_filename(segment), 'rb')
             fd = open(self.segment_filename(segment), 'rb')
-            self.fds[segment] = fd
+            self.fds[segment] = (now, fd)
             return fd
             return fd
 
 
+        def clean_old():
+            # we regularly get rid of all old FDs here:
+            if now - self._fds_cleaned > FD_MAX_AGE // 8:
+                self._fds_cleaned = now
+                for k, ts_fd in list(self.fds.items()):
+                    ts, fd = ts_fd
+                    if now - ts > FD_MAX_AGE:
+                        # we do not want to touch long-unused file handles to
+                        # avoid ESTALE issues (e.g. on network filesystems).
+                        del self.fds[k]
+
+        clean_old()
+        try:
+            ts, fd = self.fds[segment]
+        except KeyError:
+            fd = open_fd()
+        else:
+            # we only have fresh enough stuff here.
+            # update the timestamp of the lru cache entry.
+            self.fds.upd(segment, (now, fd))
+        return fd
+
     def close_segment(self):
     def close_segment(self):
         # set self._write_fd to None early to guard against reentry from error handling code paths:
         # set self._write_fd to None early to guard against reentry from error handling code paths:
         fd, self._write_fd = self._write_fd, None
         fd, self._write_fd = self._write_fd, None