|
@@ -1,24 +1,15 @@
|
|
|
import json
|
|
|
import os
|
|
|
-import socket
|
|
|
import time
|
|
|
|
|
|
+from . import platform
|
|
|
from .helpers import Error, ErrorWithTraceback
|
|
|
+from .logger import create_logger
|
|
|
|
|
|
ADD, REMOVE = 'add', 'remove'
|
|
|
SHARED, EXCLUSIVE = 'shared', 'exclusive'
|
|
|
|
|
|
-# only determine the PID and hostname once.
|
|
|
-# for FUSE mounts, we fork a child process that needs to release
|
|
|
-# the lock made by the parent, so it needs to use the same PID for that.
|
|
|
-_pid = os.getpid()
|
|
|
-_hostname = socket.gethostname()
|
|
|
-
|
|
|
-
|
|
|
-def get_id():
|
|
|
- """Get identification tuple for 'us'"""
|
|
|
- thread_id = 0
|
|
|
- return _hostname, _pid, thread_id
|
|
|
+logger = create_logger(__name__)
|
|
|
|
|
|
|
|
|
class TimeoutTimer:
|
|
@@ -109,12 +100,14 @@ class ExclusiveLock:
|
|
|
This makes sure the lock is released again if the block is left, no
|
|
|
matter how (e.g. if an exception occurred).
|
|
|
"""
|
|
|
- def __init__(self, path, timeout=None, sleep=None, id=None):
|
|
|
+ def __init__(self, path, timeout=None, sleep=None, id=None, kill_stale_locks=False):
|
|
|
self.timeout = timeout
|
|
|
self.sleep = sleep
|
|
|
self.path = os.path.abspath(path)
|
|
|
- self.id = id or get_id()
|
|
|
+ self.id = id or platform.get_process_id()
|
|
|
self.unique_name = os.path.join(self.path, "%s.%d-%x" % self.id)
|
|
|
+ self.kill_stale_locks = kill_stale_locks
|
|
|
+ self.stale_warning_printed = False
|
|
|
|
|
|
def __enter__(self):
|
|
|
return self.acquire()
|
|
@@ -137,6 +130,7 @@ class ExclusiveLock:
|
|
|
except FileExistsError: # already locked
|
|
|
if self.by_me():
|
|
|
return self
|
|
|
+ self.kill_stale_lock()
|
|
|
if timer.timed_out_or_sleep():
|
|
|
raise LockTimeout(self.path)
|
|
|
except OSError as err:
|
|
@@ -160,6 +154,48 @@ class ExclusiveLock:
|
|
|
def by_me(self):
|
|
|
return os.path.exists(self.unique_name)
|
|
|
|
|
|
+ def kill_stale_lock(self):
|
|
|
+ for name in os.listdir(self.path):
|
|
|
+ try:
|
|
|
+ host_pid, thread_str = name.rsplit('-', 1)
|
|
|
+ host, pid_str = host_pid.rsplit('.', 1)
|
|
|
+ pid = int(pid_str)
|
|
|
+ thread = int(thread_str)
|
|
|
+ except ValueError:
|
|
|
+ # Malformed lock name? Or just some new format we don't understand?
|
|
|
+ # It's safer to just exit.
|
|
|
+ return False
|
|
|
+
|
|
|
+ if platform.process_alive(host, pid, thread):
|
|
|
+ return False
|
|
|
+
|
|
|
+ if not self.kill_stale_locks:
|
|
|
+ if not self.stale_warning_printed:
|
|
|
+ # Log this at warning level to hint the user at the ability
|
|
|
+ logger.warning("Found stale lock %s, but not deleting because BORG_HOSTNAME_IS_UNIQUE is not set.", name)
|
|
|
+ self.stale_warning_printed = True
|
|
|
+ return False
|
|
|
+
|
|
|
+ try:
|
|
|
+ os.unlink(os.path.join(self.path, name))
|
|
|
+ logger.warning('Killed stale lock %s.', name)
|
|
|
+ except OSError as err:
|
|
|
+ if not self.stale_warning_printed:
|
|
|
+ # This error will bubble up and likely result in locking failure
|
|
|
+ logger.error('Found stale lock %s, but cannot delete due to %s', name, str(err))
|
|
|
+ self.stale_warning_printed = True
|
|
|
+ return False
|
|
|
+
|
|
|
+ try:
|
|
|
+ os.rmdir(self.path)
|
|
|
+ except OSError:
|
|
|
+ # Directory is not empty = we lost the race to somebody else
|
|
|
+ # Permission denied = we cannot operate anyway
|
|
|
+ # other error like EIO = we cannot operate and it's unsafe too.
|
|
|
+ return False
|
|
|
+
|
|
|
+ return True
|
|
|
+
|
|
|
def break_lock(self):
|
|
|
if self.is_locked():
|
|
|
for name in os.listdir(self.path):
|
|
@@ -174,14 +210,30 @@ class LockRoster:
|
|
|
Note: you usually should call the methods with an exclusive lock held,
|
|
|
to avoid conflicting access by multiple threads/processes/machines.
|
|
|
"""
|
|
|
- def __init__(self, path, id=None):
|
|
|
+ def __init__(self, path, id=None, kill_stale_locks=False):
|
|
|
self.path = path
|
|
|
- self.id = id or get_id()
|
|
|
+ self.id = id or platform.get_process_id()
|
|
|
+ self.kill_stale_locks = kill_stale_locks
|
|
|
|
|
|
def load(self):
|
|
|
try:
|
|
|
with open(self.path) as f:
|
|
|
data = json.load(f)
|
|
|
+
|
|
|
+ # Just nuke the stale locks early on load
|
|
|
+ if self.kill_stale_locks:
|
|
|
+ for key in (SHARED, EXCLUSIVE):
|
|
|
+ try:
|
|
|
+ entries = data[key]
|
|
|
+ except KeyError:
|
|
|
+ continue
|
|
|
+ elements = set()
|
|
|
+ for host, pid, thread in entries:
|
|
|
+ if platform.process_alive(host, pid, thread):
|
|
|
+ elements.add((host, pid, thread))
|
|
|
+ else:
|
|
|
+ logger.warning('Removed stale %s roster lock for pid %d.', key, pid)
|
|
|
+ data[key] = list(elements)
|
|
|
except (FileNotFoundError, ValueError):
|
|
|
# no or corrupt/empty roster file?
|
|
|
data = {}
|
|
@@ -235,18 +287,18 @@ class Lock:
|
|
|
This makes sure the lock is released again if the block is left, no
|
|
|
matter how (e.g. if an exception occurred).
|
|
|
"""
|
|
|
- def __init__(self, path, exclusive=False, sleep=None, timeout=None, id=None):
|
|
|
+ def __init__(self, path, exclusive=False, sleep=None, timeout=None, id=None, kill_stale_locks=False):
|
|
|
self.path = path
|
|
|
self.is_exclusive = exclusive
|
|
|
self.sleep = sleep
|
|
|
self.timeout = timeout
|
|
|
- self.id = id or get_id()
|
|
|
+ self.id = id or platform.get_process_id()
|
|
|
# globally keeping track of shared and exclusive lockers:
|
|
|
- self._roster = LockRoster(path + '.roster', id=id)
|
|
|
+ self._roster = LockRoster(path + '.roster', id=id, kill_stale_locks=kill_stale_locks)
|
|
|
# an exclusive lock, used for:
|
|
|
# - holding while doing roster queries / updates
|
|
|
- # - holding while the Lock instance itself is exclusive
|
|
|
- self._lock = ExclusiveLock(path + '.exclusive', id=id, timeout=timeout)
|
|
|
+ # - holding while the Lock itself is exclusive
|
|
|
+ self._lock = ExclusiveLock(path + '.exclusive', id=id, timeout=timeout, kill_stale_locks=kill_stale_locks)
|
|
|
|
|
|
def __enter__(self):
|
|
|
return self.acquire()
|