Browse Source

redo stale lock handling, fixes #3986

drop BORG_HOSTNAME_IS_UNIQUE (please use BORG_HOST_ID if needed)

borg now always assumes it has a unique hostid - either automatically
from fqdn plus uuid.getnode() or overridden via BORG_HOST_ID.
Thomas Waldmann 6 years ago
parent
commit
7ad5290501

+ 7 - 0
docs/changes.rst

@@ -205,6 +205,13 @@ Compatibility notes:
   - option "--list-format" (2017-10), use "--format"
   - option "--list-format" (2017-10), use "--format"
   - option "--ignore-inode" (2017-09), use "--files-cache" w/o "inode"
   - option "--ignore-inode" (2017-09), use "--files-cache" w/o "inode"
   - option "--no-files-cache" (2017-09), use "--files-cache=disabled"
   - option "--no-files-cache" (2017-09), use "--files-cache=disabled"
+- removed BORG_HOSTNAME_IS_UNIQUE env var.
+  to use borg you must implement one of these 2 scenarios:
+  - 1) the combination of FQDN and result of uuid.getnode() must be unique
+       and stable (this should be the case for almost everybody, except when
+       having duplicate FQDN *and* MAC address or all-zero MAC address)
+  - 2) if you are aware that 1) is not the case for you, you must set
+       BORG_HOST_ID env var to something unique.
 
 
 Fixes:
 Fixes:
 
 

+ 1 - 1
docs/usage/serve.rst

@@ -12,7 +12,7 @@ that it is also ``borg serve`` and enforce path restriction(s) as given by the
 forced command. That way, other options given by the client (like ``--info`` or
 forced command. That way, other options given by the client (like ``--info`` or
 ``--umask``) are preserved (and are not fixed by the forced command).
 ``--umask``) are preserved (and are not fixed by the forced command).
 
 
-Environment variables (such as BORG_HOSTNAME_IS_UNIQUE) contained in the original
+Environment variables (such as BORG_XXX) contained in the original
 command sent by the client are *not* interpreted, but ignored. If BORG_XXX environment
 command sent by the client are *not* interpreted, but ignored. If BORG_XXX environment
 variables should be set on the ``borg serve`` side, then these must be set in system-specific
 variables should be set on the ``borg serve`` side, then these must be set in system-specific
 locations like ``/etc/environment`` or in the forced command itself (example below).
 locations like ``/etc/environment`` or in the forced command itself (example below).

+ 2 - 3
src/borg/cache.py

@@ -19,7 +19,7 @@ from .helpers import get_cache_dir, get_security_dir
 from .helpers import int_to_bigint, bigint_to_int, bin_to_hex, parse_stringified_list
 from .helpers import int_to_bigint, bigint_to_int, bin_to_hex, parse_stringified_list
 from .helpers import format_file_size
 from .helpers import format_file_size
 from .helpers import safe_ns
 from .helpers import safe_ns
-from .helpers import yes, hostname_is_unique
+from .helpers import yes
 from .helpers import remove_surrogates
 from .helpers import remove_surrogates
 from .helpers import ProgressIndicatorPercent, ProgressIndicatorMessage
 from .helpers import ProgressIndicatorPercent, ProgressIndicatorMessage
 from .helpers import set_ec, EXIT_WARNING
 from .helpers import set_ec, EXIT_WARNING
@@ -256,8 +256,7 @@ class CacheConfig:
             config.write(fd)
             config.write(fd)
 
 
     def open(self):
     def open(self):
-        self.lock = Lock(os.path.join(self.path, 'lock'), exclusive=True, timeout=self.lock_wait,
-                         kill_stale_locks=hostname_is_unique()).acquire()
+        self.lock = Lock(os.path.join(self.path, 'lock'), exclusive=True, timeout=self.lock_wait).acquire()
         self.load()
         self.load()
 
 
     def load(self):
     def load(self):

+ 0 - 4
src/borg/helpers/yes.py

@@ -102,7 +102,3 @@ def yes(msg=None, false_msg=None, true_msg=None, default_msg=None,
             output(retry_msg, 'prompt_retry', is_prompt=True)
             output(retry_msg, 'prompt_retry', is_prompt=True)
         # in case we used an environment variable and it gave an invalid answer, do not use it again:
         # in case we used an environment variable and it gave an invalid answer, do not use it again:
         env_var_override = None
         env_var_override = None
-
-
-def hostname_is_unique():
-    return yes(env_var_override='BORG_HOSTNAME_IS_UNIQUE', prompt=False, env_msg=None, default=True)

+ 8 - 8
src/borg/locking.py

@@ -101,13 +101,13 @@ class ExclusiveLock:
     This makes sure the lock is released again if the block is left, no
     This makes sure the lock is released again if the block is left, no
     matter how (e.g. if an exception occurred).
     matter how (e.g. if an exception occurred).
     """
     """
-    def __init__(self, path, timeout=None, sleep=None, id=None, kill_stale_locks=False):
+    def __init__(self, path, timeout=None, sleep=None, id=None):
         self.timeout = timeout
         self.timeout = timeout
         self.sleep = sleep
         self.sleep = sleep
         self.path = os.path.abspath(path)
         self.path = os.path.abspath(path)
         self.id = id or platform.get_process_id()
         self.id = id or platform.get_process_id()
         self.unique_name = os.path.join(self.path, "%s.%d-%x" % self.id)
         self.unique_name = os.path.join(self.path, "%s.%d-%x" % self.id)
-        self.kill_stale_locks = kill_stale_locks
+        self.kill_stale_locks = True
         self.stale_warning_printed = False
         self.stale_warning_printed = False
 
 
     def __enter__(self):
     def __enter__(self):
@@ -173,7 +173,7 @@ class ExclusiveLock:
             if not self.kill_stale_locks:
             if not self.kill_stale_locks:
                 if not self.stale_warning_printed:
                 if not self.stale_warning_printed:
                     # Log this at warning level to hint the user at the ability
                     # Log this at warning level to hint the user at the ability
-                    logger.warning("Found stale lock %s, but not deleting because BORG_HOSTNAME_IS_UNIQUE is False.", name)
+                    logger.warning("Found stale lock %s, but not deleting because self.kill_stale_locks = False.", name)
                     self.stale_warning_printed = True
                     self.stale_warning_printed = True
                 return False
                 return False
 
 
@@ -223,10 +223,10 @@ class LockRoster:
     Note: you usually should call the methods with an exclusive lock held,
     Note: you usually should call the methods with an exclusive lock held,
     to avoid conflicting access by multiple threads/processes/machines.
     to avoid conflicting access by multiple threads/processes/machines.
     """
     """
-    def __init__(self, path, id=None, kill_stale_locks=False):
+    def __init__(self, path, id=None):
         self.path = path
         self.path = path
         self.id = id or platform.get_process_id()
         self.id = id or platform.get_process_id()
-        self.kill_stale_locks = kill_stale_locks
+        self.kill_stale_locks = True
 
 
     def load(self):
     def load(self):
         try:
         try:
@@ -320,18 +320,18 @@ class Lock:
     This makes sure the lock is released again if the block is left, no
     This makes sure the lock is released again if the block is left, no
     matter how (e.g. if an exception occurred).
     matter how (e.g. if an exception occurred).
     """
     """
-    def __init__(self, path, exclusive=False, sleep=None, timeout=None, id=None, kill_stale_locks=False):
+    def __init__(self, path, exclusive=False, sleep=None, timeout=None, id=None):
         self.path = path
         self.path = path
         self.is_exclusive = exclusive
         self.is_exclusive = exclusive
         self.sleep = sleep
         self.sleep = sleep
         self.timeout = timeout
         self.timeout = timeout
         self.id = id or platform.get_process_id()
         self.id = id or platform.get_process_id()
         # globally keeping track of shared and exclusive lockers:
         # globally keeping track of shared and exclusive lockers:
-        self._roster = LockRoster(path + '.roster', id=id, kill_stale_locks=kill_stale_locks)
+        self._roster = LockRoster(path + '.roster', id=id)
         # an exclusive lock, used for:
         # an exclusive lock, used for:
         # - holding while doing roster queries / updates
         # - holding while doing roster queries / updates
         # - holding while the Lock itself is exclusive
         # - holding while the Lock itself is exclusive
-        self._lock = ExclusiveLock(path + '.exclusive', id=id, timeout=timeout, kill_stale_locks=kill_stale_locks)
+        self._lock = ExclusiveLock(path + '.exclusive', id=id, timeout=timeout)
 
 
     def __enter__(self):
     def __enter__(self):
         return self.acquire()
         return self.acquire()

+ 0 - 3
src/borg/remote.py

@@ -22,7 +22,6 @@ from .helpers import Error, IntegrityError
 from .helpers import bin_to_hex
 from .helpers import bin_to_hex
 from .helpers import get_base_dir
 from .helpers import get_base_dir
 from .helpers import get_limited_unpacker
 from .helpers import get_limited_unpacker
-from .helpers import hostname_is_unique
 from .helpers import replace_placeholders
 from .helpers import replace_placeholders
 from .helpers import sysinfo
 from .helpers import sysinfo
 from .helpers import format_file_size
 from .helpers import format_file_size
@@ -679,8 +678,6 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
             if 'storage_quota' in args and args.storage_quota:
             if 'storage_quota' in args and args.storage_quota:
                 opts.append('--storage-quota=%s' % args.storage_quota)
                 opts.append('--storage-quota=%s' % args.storage_quota)
         env_vars = []
         env_vars = []
-        if not hostname_is_unique():
-            env_vars.append('BORG_HOSTNAME_IS_UNIQUE=no')
         if testing:
         if testing:
             return env_vars + [sys.executable, '-m', 'borg.archiver', 'serve'] + opts + self.extra_test_args
             return env_vars + [sys.executable, '-m', 'borg.archiver', 'serve'] + opts + self.extra_test_args
         else:  # pragma: no cover
         else:  # pragma: no cover

+ 1 - 2
src/borg/repository.py

@@ -17,7 +17,6 @@ from .helpers import Error, ErrorWithTraceback, IntegrityError, format_file_size
 from .helpers import Location
 from .helpers import Location
 from .helpers import ProgressIndicatorPercent
 from .helpers import ProgressIndicatorPercent
 from .helpers import bin_to_hex
 from .helpers import bin_to_hex
-from .helpers import hostname_is_unique
 from .helpers import secure_erase, truncate_and_unlink
 from .helpers import secure_erase, truncate_and_unlink
 from .helpers import Manifest
 from .helpers import Manifest
 from .helpers import msgpack
 from .helpers import msgpack
@@ -390,7 +389,7 @@ class Repository:
         if not os.path.isdir(path):
         if not os.path.isdir(path):
             raise self.DoesNotExist(path)
             raise self.DoesNotExist(path)
         if lock:
         if lock:
-            self.lock = Lock(os.path.join(path, 'lock'), exclusive, timeout=lock_wait, kill_stale_locks=hostname_is_unique()).acquire()
+            self.lock = Lock(os.path.join(path, 'lock'), exclusive, timeout=lock_wait).acquire()
         else:
         else:
             self.lock = None
             self.lock = None
         self.config = ConfigParser(interpolation=None)
         self.config = ConfigParser(interpolation=None)

+ 1 - 1
src/borg/testsuite/archiver.py

@@ -3640,7 +3640,7 @@ def test_get_args():
     # were not forced, environment variables would be interpreted by the shell, but this does not
     # were not forced, environment variables would be interpreted by the shell, but this does not
     # happen for forced commands - we get the verbatim command line and need to deal with env vars.
     # happen for forced commands - we get the verbatim command line and need to deal with env vars.
     args = archiver.get_args(['borg', 'serve', ],
     args = archiver.get_args(['borg', 'serve', ],
-                             'BORG_HOSTNAME_IS_UNIQUE=yes borg serve --info')
+                             'BORG_FOO=bar borg serve --info')
     assert args.func == archiver.do_serve
     assert args.func == archiver.do_serve
 
 
 
 

+ 12 - 8
src/borg/testsuite/locking.py

@@ -67,7 +67,7 @@ class TestExclusiveLock:
         cant_know_if_dead_id = 'foo.bar.example.net', 1, 2
         cant_know_if_dead_id = 'foo.bar.example.net', 1, 2
 
 
         dead_lock = ExclusiveLock(lockpath, id=dead_id).acquire()
         dead_lock = ExclusiveLock(lockpath, id=dead_id).acquire()
-        with ExclusiveLock(lockpath, id=our_id, kill_stale_locks=True):
+        with ExclusiveLock(lockpath, id=our_id):
             with pytest.raises(NotMyLock):
             with pytest.raises(NotMyLock):
                 dead_lock.release()
                 dead_lock.release()
         with pytest.raises(NotLocked):
         with pytest.raises(NotLocked):
@@ -75,7 +75,7 @@ class TestExclusiveLock:
 
 
         with ExclusiveLock(lockpath, id=cant_know_if_dead_id):
         with ExclusiveLock(lockpath, id=cant_know_if_dead_id):
             with pytest.raises(LockTimeout):
             with pytest.raises(LockTimeout):
-                ExclusiveLock(lockpath, id=our_id, kill_stale_locks=True, timeout=0.1).acquire()
+                ExclusiveLock(lockpath, id=our_id, timeout=0.1).acquire()
 
 
     def test_migrate_lock(self, lockpath):
     def test_migrate_lock(self, lockpath):
         old_id, new_id = ID1, ID2
         old_id, new_id = ID1, ID2
@@ -157,7 +157,7 @@ class TestLock:
 
 
         dead_lock = Lock(lockpath, id=dead_id, exclusive=True).acquire()
         dead_lock = Lock(lockpath, id=dead_id, exclusive=True).acquire()
         roster = dead_lock._roster
         roster = dead_lock._roster
-        with Lock(lockpath, id=our_id, kill_stale_locks=True):
+        with Lock(lockpath, id=our_id):
             assert roster.get(EXCLUSIVE) == set()
             assert roster.get(EXCLUSIVE) == set()
             assert roster.get(SHARED) == {our_id}
             assert roster.get(SHARED) == {our_id}
         assert roster.get(EXCLUSIVE) == set()
         assert roster.get(EXCLUSIVE) == set()
@@ -167,7 +167,7 @@ class TestLock:
 
 
         with Lock(lockpath, id=cant_know_if_dead_id, exclusive=True):
         with Lock(lockpath, id=cant_know_if_dead_id, exclusive=True):
             with pytest.raises(LockTimeout):
             with pytest.raises(LockTimeout):
-                Lock(lockpath, id=our_id, kill_stale_locks=True, timeout=0.1).acquire()
+                Lock(lockpath, id=our_id, timeout=0.1).acquire()
 
 
     def test_migrate_lock(self, lockpath):
     def test_migrate_lock(self, lockpath):
         old_id, new_id = ID1, ID2
         old_id, new_id = ID1, ID2
@@ -217,25 +217,29 @@ class TestLockRoster:
         host, pid, tid = our_id = get_process_id()
         host, pid, tid = our_id = get_process_id()
         dead_id = host, free_pid, tid
         dead_id = host, free_pid, tid
 
 
+        # put a dead local process lock into roster
         roster1 = LockRoster(rosterpath, id=dead_id)
         roster1 = LockRoster(rosterpath, id=dead_id)
+        roster1.kill_stale_locks = False
         assert roster1.get(SHARED) == set()
         assert roster1.get(SHARED) == set()
         roster1.modify(SHARED, ADD)
         roster1.modify(SHARED, ADD)
         assert roster1.get(SHARED) == {dead_id}
         assert roster1.get(SHARED) == {dead_id}
 
 
+        # put a unknown-state remote process lock into roster
         cant_know_if_dead_id = 'foo.bar.example.net', 1, 2
         cant_know_if_dead_id = 'foo.bar.example.net', 1, 2
         roster1 = LockRoster(rosterpath, id=cant_know_if_dead_id)
         roster1 = LockRoster(rosterpath, id=cant_know_if_dead_id)
+        roster1.kill_stale_locks = False
         assert roster1.get(SHARED) == {dead_id}
         assert roster1.get(SHARED) == {dead_id}
         roster1.modify(SHARED, ADD)
         roster1.modify(SHARED, ADD)
         assert roster1.get(SHARED) == {dead_id, cant_know_if_dead_id}
         assert roster1.get(SHARED) == {dead_id, cant_know_if_dead_id}
 
 
-        killer_roster = LockRoster(rosterpath, kill_stale_locks=True)
-        # Did kill the dead processes lock (which was alive ... I guess?!)
+        killer_roster = LockRoster(rosterpath)
+        # Active kill_stale_locks here - does it kill the dead_id lock?
         assert killer_roster.get(SHARED) == {cant_know_if_dead_id}
         assert killer_roster.get(SHARED) == {cant_know_if_dead_id}
         killer_roster.modify(SHARED, ADD)
         killer_roster.modify(SHARED, ADD)
         assert killer_roster.get(SHARED) == {our_id, cant_know_if_dead_id}
         assert killer_roster.get(SHARED) == {our_id, cant_know_if_dead_id}
 
 
-        other_killer_roster = LockRoster(rosterpath, kill_stale_locks=True)
-        # Did not kill us, since we're alive
+        other_killer_roster = LockRoster(rosterpath)
+        # Active kill_stale_locks here - must not kill our_id lock since we're alive.
         assert other_killer_roster.get(SHARED) == {our_id, cant_know_if_dead_id}
         assert other_killer_roster.get(SHARED) == {our_id, cant_know_if_dead_id}
 
 
     def test_migrate_lock(self, rosterpath):
     def test_migrate_lock(self, rosterpath):