| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624 | import argparseimport binasciiimport grpimport msgpackimport osimport pwdimport reimport sysimport timefrom datetime import datetime, timezone, timedeltafrom fnmatch import translatefrom operator import attrgetterimport fcntlimport attic.hashindeximport attic.chunkerimport attic.cryptoclass Error(Exception):    """Error base class"""    exit_code = 1    def get_message(self):        return 'Error: ' + type(self).__doc__.format(*self.args)class ExtensionModuleError(Error):    """The Borg binary extension modules do not seem to be properly installed"""class UpgradableLock:    class ReadLockFailed(Error):        """Failed to acquire read lock on {}"""    class WriteLockFailed(Error):        """Failed to acquire write lock on {}"""    def __init__(self, path, exclusive=False):        self.path = path        try:            self.fd = open(path, 'r+')        except IOError:            self.fd = open(path, 'r')        try:            if exclusive:                fcntl.lockf(self.fd, fcntl.LOCK_EX)            else:                fcntl.lockf(self.fd, fcntl.LOCK_SH)        # Python 3.2 raises IOError, Python3.3+ raises OSError        except (IOError, OSError):            if exclusive:                raise self.WriteLockFailed(self.path)            else:                raise self.ReadLockFailed(self.path)        self.is_exclusive = exclusive    def upgrade(self):        try:            fcntl.lockf(self.fd, fcntl.LOCK_EX)        # Python 3.2 raises IOError, Python3.3+ raises OSError        except (IOError, OSError):            raise self.WriteLockFailed(self.path)        self.is_exclusive = True    def release(self):        fcntl.lockf(self.fd, fcntl.LOCK_UN)        self.fd.close()def check_extension_modules():    import attic.platform    if (attic.hashindex.API_VERSION != 2 or        attic.chunker.API_VERSION != 2 or        attic.crypto.API_VERSION != 2 or        attic.platform.API_VERSION != 2):        raise ExtensionModuleErrorclass Manifest:    MANIFEST_ID = b'\0' * 32    def __init__(self, key, repository):        self.archives = {}        self.config = {}        self.key = key        self.repository = repository    @classmethod    def load(cls, repository, key=None):        from .key import key_factory        cdata = repository.get(cls.MANIFEST_ID)        if not key:            key = key_factory(repository, cdata)        manifest = cls(key, repository)        data = key.decrypt(None, cdata)        manifest.id = key.id_hash(data)        m = msgpack.unpackb(data)        if not m.get(b'version') == 1:            raise ValueError('Invalid manifest version')        manifest.archives = dict((k.decode('utf-8'), v) for k, v in m[b'archives'].items())        manifest.timestamp = m.get(b'timestamp')        if manifest.timestamp:            manifest.timestamp = manifest.timestamp.decode('ascii')        manifest.config = m[b'config']        return manifest, key    def write(self):        self.timestamp = datetime.utcnow().isoformat()        data = msgpack.packb(StableDict({            'version': 1,            'archives': self.archives,            'timestamp': self.timestamp,            'config': self.config,        }))        self.id = self.key.id_hash(data)        self.repository.put(self.MANIFEST_ID, self.key.encrypt(data))def prune_within(archives, within):    multiplier = {'H': 1, 'd': 24, 'w': 24*7, 'm': 24*31, 'y': 24*365}    try:        hours = int(within[:-1]) * multiplier[within[-1]]    except (KeyError, ValueError):        # I don't like how this displays the original exception too:        raise argparse.ArgumentTypeError('Unable to parse --within option: "%s"' % within)    if hours <= 0:        raise argparse.ArgumentTypeError('Number specified using --within option must be positive')    target = datetime.now(timezone.utc) - timedelta(seconds=hours*60*60)    return [a for a in archives if a.ts > target]def prune_split(archives, pattern, n, skip=[]):    last = None    keep = []    if n == 0:        return keep    for a in sorted(archives, key=attrgetter('ts'), reverse=True):        period = to_localtime(a.ts).strftime(pattern)        if period != last:            last = period            if a not in skip:                keep.append(a)                if len(keep) == n:                    break    return keepclass Statistics:    def __init__(self):        self.osize = self.csize = self.usize = self.nfiles = 0    def update(self, size, csize, unique):        self.osize += size        self.csize += csize        if unique:            self.usize += csize    def print_(self, label, cache):        total_size, total_csize, unique_size, unique_csize = cache.chunks.summarize()        print()        print('                       Original size      Compressed size    Deduplicated size')        print('%-15s %20s %20s %20s' % (label, format_file_size(self.osize), format_file_size(self.csize), format_file_size(self.usize)))        print('All archives:   %20s %20s %20s' % (format_file_size(total_size), format_file_size(total_csize), format_file_size(unique_csize)))    def show_progress(self, item=None, final=False):        if not final:            path = remove_surrogates(item[b'path']) if item else ''            if len(path) > 43:                path = '%s...%s' % (path[:20], path[-20:])            msg = '%9s O %9s C %9s D %-43s' % (                format_file_size(self.osize), format_file_size(self.csize), format_file_size(self.usize), path)        else:            msg = ' ' * 79        print(msg, end='\r')        sys.stdout.flush()def get_keys_dir():    """Determine where to repository keys and cache"""    return os.environ.get('BORG_KEYS_DIR',                          os.path.join(os.path.expanduser('~'), '.borg', 'keys'))def get_cache_dir():    """Determine where to repository keys and cache"""    return os.environ.get('BORG_CACHE_DIR',                          os.path.join(os.path.expanduser('~'), '.cache', 'borg'))def to_localtime(ts):    """Convert datetime object from UTC to local time zone"""    return datetime(*time.localtime((ts - datetime(1970, 1, 1, tzinfo=timezone.utc)).total_seconds())[:6])def parse_timestamp(timestamp):    """Parse a ISO 8601 timestamp string"""    if '.' in timestamp:  # microseconds might not be pressent        return datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S.%f').replace(tzinfo=timezone.utc)    else:        return datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S').replace(tzinfo=timezone.utc)def update_excludes(args):    """Merge exclude patterns from files with those on command line.    Empty lines and lines starting with '#' are ignored, but whitespace    is not stripped."""    if hasattr(args, 'exclude_files') and args.exclude_files:        if not hasattr(args, 'excludes') or args.excludes is None:            args.excludes = []        for file in args.exclude_files:            patterns = [line.rstrip('\r\n') for line in file if not line.startswith('#')]            args.excludes += [ExcludePattern(pattern) for pattern in patterns if pattern]            file.close()def adjust_patterns(paths, excludes):    if paths:        return (excludes or []) + [IncludePattern(path) for path in paths] + [ExcludePattern('*')]    else:        return excludesdef exclude_path(path, patterns):    """Used by create and extract sub-commands to determine    whether or not an item should be processed.    """    for pattern in (patterns or []):        if pattern.match(path):            return isinstance(pattern, ExcludePattern)    return False# For both IncludePattern and ExcludePattern, we require that# the pattern either match the whole path or an initial segment# of the path up to but not including a path separator.  To# unify the two cases, we add a path separator to the end of# the path before matching.class IncludePattern:    """Literal files or directories listed on the command line    for some operations (e.g. extract, but not create).    If a directory is specified, all paths that start with that    path match as well.  A trailing slash makes no difference.    """    def __init__(self, pattern):        self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep    def match(self, path):        return (path+os.path.sep).startswith(self.pattern)    def __repr__(self):        return '%s(%s)' % (type(self), self.pattern)class ExcludePattern(IncludePattern):    """Shell glob patterns to exclude.  A trailing slash means to    exclude the contents of a directory, but not the directory itself.    """    def __init__(self, pattern):        if pattern.endswith(os.path.sep):            self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep+'*'+os.path.sep        else:            self.pattern = os.path.normpath(pattern)+os.path.sep+'*'        # fnmatch and re.match both cache compiled regular expressions.        # Nevertheless, this is about 10 times faster.        self.regex = re.compile(translate(self.pattern))    def match(self, path):        return self.regex.match(path+os.path.sep) is not None    def __repr__(self):        return '%s(%s)' % (type(self), self.pattern)def timestamp(s):    """Convert a --timestamp=s argument to a datetime object"""    try:        # is it pointing to a file / directory?        ts = os.stat(s).st_mtime        return datetime.utcfromtimestamp(ts)    except OSError:        # didn't work, try parsing as timestamp. UTC, no TZ, no microsecs support.        for format in ('%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%dT%H:%M:%S+00:00',                       '%Y-%m-%dT%H:%M:%S', '%Y-%m-%d %H:%M:%S',                       '%Y-%m-%dT%H:%M', '%Y-%m-%d %H:%M',                       '%Y-%m-%d', '%Y-%j',                       ):            try:                return datetime.strptime(s, format)            except ValueError:                continue        raise ValueErrordef is_cachedir(path):    """Determines whether the specified path is a cache directory (and    therefore should potentially be excluded from the backup) according to    the CACHEDIR.TAG protocol    (http://www.brynosaurus.com/cachedir/spec.html).    """    tag_contents = b'Signature: 8a477f597d28d172789f06886806bc55'    tag_path = os.path.join(path, 'CACHEDIR.TAG')    try:        if os.path.exists(tag_path):            with open(tag_path, 'rb') as tag_file:                tag_data = tag_file.read(len(tag_contents))                if tag_data == tag_contents:                    return True    except OSError:        pass    return Falsedef format_time(t):    """Format datetime suitable for fixed length list output    """    if abs((datetime.now() - t).days) < 365:        return t.strftime('%b %d %H:%M')    else:        return t.strftime('%b %d  %Y')def format_timedelta(td):    """Format timedelta in a human friendly format    """    # Since td.total_seconds() requires python 2.7    ts = (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10 ** 6) / float(10 ** 6)    s = ts % 60    m = int(ts / 60) % 60    h = int(ts / 3600) % 24    txt = '%.2f seconds' % s    if m:        txt = '%d minutes %s' % (m, txt)    if h:        txt = '%d hours %s' % (h, txt)    if td.days:        txt = '%d days %s' % (td.days, txt)    return txtdef format_file_mode(mod):    """Format file mode bits for list output    """    def x(v):        return ''.join(v & m and s or '-'                       for m, s in ((4, 'r'), (2, 'w'), (1, 'x')))    return '%s%s%s' % (x(mod // 64), x(mod // 8), x(mod))def format_file_size(v):    """Format file size into a human friendly format    """    if abs(v) > 10**12:        return '%.2f TB' % (v / 10**12)    elif abs(v) > 10**9:        return '%.2f GB' % (v / 10**9)    elif abs(v) > 10**6:        return '%.2f MB' % (v / 10**6)    elif abs(v) > 10**3:        return '%.2f kB' % (v / 10**3)    else:        return '%d B' % vdef format_archive(archive):    return '%-36s %s' % (archive.name, to_localtime(archive.ts).strftime('%c'))class IntegrityError(Error):    """Data integrity error"""def memoize(function):    cache = {}    def decorated_function(*args):        try:            return cache[args]        except KeyError:            val = function(*args)            cache[args] = val            return val    return decorated_function@memoizedef uid2user(uid, default=None):    try:        return pwd.getpwuid(uid).pw_name    except KeyError:        return default@memoizedef user2uid(user, default=None):    try:        return user and pwd.getpwnam(user).pw_uid    except KeyError:        return default@memoizedef gid2group(gid, default=None):    try:        return grp.getgrgid(gid).gr_name    except KeyError:        return default@memoizedef group2gid(group, default=None):    try:        return group and grp.getgrnam(group).gr_gid    except KeyError:        return defaultdef posix_acl_use_stored_uid_gid(acl):    """Replace the user/group field with the stored uid/gid    """    entries = []    for entry in acl.decode('ascii').split('\n'):        if entry:            fields = entry.split(':')            if len(fields) == 4:                entries.append(':'.join([fields[0], fields[3], fields[2]]))            else:                entries.append(entry)    return ('\n'.join(entries)).encode('ascii')class Location:    """Object representing a repository / archive location    """    proto = user = host = port = path = archive = None    ssh_re = re.compile(r'(?P<proto>ssh)://(?:(?P<user>[^@]+)@)?'                        r'(?P<host>[^:/#]+)(?::(?P<port>\d+))?'                        r'(?P<path>[^:]+)(?:::(?P<archive>.+))?$')    file_re = re.compile(r'(?P<proto>file)://'                         r'(?P<path>[^:]+)(?:::(?P<archive>.+))?$')    scp_re = re.compile(r'((?:(?P<user>[^@]+)@)?(?P<host>[^:/]+):)?'                        r'(?P<path>[^:]+)(?:::(?P<archive>.+))?$')    def __init__(self, text):        self.orig = text        if not self.parse(text):            raise ValueError    def parse(self, text):        m = self.ssh_re.match(text)        if m:            self.proto = m.group('proto')            self.user = m.group('user')            self.host = m.group('host')            self.port = m.group('port') and int(m.group('port')) or None            self.path = m.group('path')            self.archive = m.group('archive')            return True        m = self.file_re.match(text)        if m:            self.proto = m.group('proto')            self.path = m.group('path')            self.archive = m.group('archive')            return True        m = self.scp_re.match(text)        if m:            self.user = m.group('user')            self.host = m.group('host')            self.path = m.group('path')            self.archive = m.group('archive')            self.proto = self.host and 'ssh' or 'file'            return True        return False    def __str__(self):        items = []        items.append('proto=%r' % self.proto)        items.append('user=%r' % self.user)        items.append('host=%r' % self.host)        items.append('port=%r' % self.port)        items.append('path=%r' % self.path)        items.append('archive=%r' % self.archive)        return ', '.join(items)    def to_key_filename(self):        name = re.sub('[^\w]', '_', self.path).strip('_')        if self.proto != 'file':            name = self.host + '__' + name        return os.path.join(get_keys_dir(), name)    def __repr__(self):        return "Location(%s)" % self    def canonical_path(self):        if self.proto == 'file':            return self.path        else:            if self.path and self.path.startswith('~'):                path = '/' + self.path            elif self.path and not self.path.startswith('/'):                path = '/~/' + self.path            else:                path = self.path            return 'ssh://{}{}{}{}'.format('{}@'.format(self.user) if self.user else '',                                                        self.host,                                                        ':{}'.format(self.port) if self.port else '',                                                        path)def location_validator(archive=None):    def validator(text):        try:            loc = Location(text)        except ValueError:            raise argparse.ArgumentTypeError('Invalid location format: "%s"' % text)        if archive is True and not loc.archive:            raise argparse.ArgumentTypeError('"%s": No archive specified' % text)        elif archive is False and loc.archive:            raise argparse.ArgumentTypeError('"%s" No archive can be specified' % text)        return loc    return validatordef read_msgpack(filename):    with open(filename, 'rb') as fd:        return msgpack.unpack(fd)def write_msgpack(filename, d):    with open(filename + '.tmp', 'wb') as fd:        msgpack.pack(d, fd)        fd.flush()        os.fsync(fd.fileno())    os.rename(filename + '.tmp', filename)def decode_dict(d, keys, encoding='utf-8', errors='surrogateescape'):    for key in keys:        if isinstance(d.get(key), bytes):            d[key] = d[key].decode(encoding, errors)    return ddef remove_surrogates(s, errors='replace'):    """Replace surrogates generated by fsdecode with '?'    """    return s.encode('utf-8', errors).decode('utf-8')_safe_re = re.compile(r'^((\.\.)?/+)+')def make_path_safe(path):    """Make path safe by making it relative and local    """    return _safe_re.sub('', path) or '.'def daemonize():    """Detach process from controlling terminal and run in background    """    pid = os.fork()    if pid:        os._exit(0)    os.setsid()    pid = os.fork()    if pid:        os._exit(0)    os.chdir('/')    os.close(0)    os.close(1)    os.close(2)    fd = os.open('/dev/null', os.O_RDWR)    os.dup2(fd, 0)    os.dup2(fd, 1)    os.dup2(fd, 2)class StableDict(dict):    """A dict subclass with stable items() ordering"""    def items(self):        return sorted(super(StableDict, self).items())if sys.version < '3.3':    # st_mtime_ns attribute only available in 3.3+    def st_mtime_ns(st):        return int(st.st_mtime * 1e9)    # unhexlify in < 3.3 incorrectly only accepts bytes input    def unhexlify(data):        if isinstance(data, str):            data = data.encode('ascii')        return binascii.unhexlify(data)else:    def st_mtime_ns(st):        return st.st_mtime_ns    unhexlify = binascii.unhexlifydef bigint_to_int(mtime):    """Convert bytearray to int    """    if isinstance(mtime, bytes):        return int.from_bytes(mtime, 'little', signed=True)    return mtimedef int_to_bigint(value):    """Convert integers larger than 64 bits to bytearray    Smaller integers are left alone    """    if value.bit_length() > 63:        return value.to_bytes((value.bit_length() + 9) // 8, 'little', signed=True)    return value
 |