Преглед изворни кода

Merge pull request #1053 from ThomasWaldmann/wcswidth

add swidth call, fixes #1051
enkore пре 9 година
родитељ
комит
13a4e40024

+ 55 - 2
borg/archive.py

@@ -9,6 +9,7 @@ from .key import key_factory
 from .remote import cache_if_remote
 
 import os
+from shutil import get_terminal_size
 import socket
 import stat
 import sys
@@ -19,13 +20,13 @@ from .compress import COMPR_BUFFER
 from .constants import *  # NOQA
 from .helpers import Chunk, Error, uid2user, user2uid, gid2group, group2gid, \
     parse_timestamp, to_localtime, format_time, format_timedelta, safe_encode, safe_decode, \
-    Manifest, Statistics, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, bin_to_hex, \
+    Manifest, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, bin_to_hex, \
     ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, \
     PathPrefixPattern, FnmatchPattern, open_item, file_status, format_file_size, consume, \
     CompressionDecider1, CompressionDecider2, CompressionSpec, \
     IntegrityError
 from .repository import Repository
-from .platform import acl_get, acl_set, set_flags, get_flags
+from .platform import acl_get, acl_set, set_flags, get_flags, swidth
 from .chunker import Chunker
 from .hashindex import ChunkIndex, ChunkIndexEntry
 from .cache import ChunkListEntry
@@ -37,6 +38,58 @@ flags_normal = os.O_RDONLY | getattr(os, 'O_BINARY', 0)
 flags_noatime = flags_normal | getattr(os, 'O_NOATIME', 0)
 
 
+class Statistics:
+
+    def __init__(self):
+        self.osize = self.csize = self.usize = self.nfiles = 0
+        self.last_progress = 0  # timestamp when last progress was shown
+
+    def update(self, size, csize, unique):
+        self.osize += size
+        self.csize += csize
+        if unique:
+            self.usize += csize
+
+    summary = """\
+                       Original size      Compressed size    Deduplicated size
+{label:15} {stats.osize_fmt:>20s} {stats.csize_fmt:>20s} {stats.usize_fmt:>20s}"""
+
+    def __str__(self):
+        return self.summary.format(stats=self, label='This archive:')
+
+    def __repr__(self):
+        return "<{cls} object at {hash:#x} ({self.osize}, {self.csize}, {self.usize})>".format(
+            cls=type(self).__name__, hash=id(self), self=self)
+
+    @property
+    def osize_fmt(self):
+        return format_file_size(self.osize)
+
+    @property
+    def usize_fmt(self):
+        return format_file_size(self.usize)
+
+    @property
+    def csize_fmt(self):
+        return format_file_size(self.csize)
+
+    def show_progress(self, item=None, final=False, stream=None, dt=None):
+        now = time.time()
+        if dt is None or now - self.last_progress > dt:
+            self.last_progress = now
+            columns, lines = get_terminal_size()
+            if not final:
+                msg = '{0.osize_fmt} O {0.csize_fmt} C {0.usize_fmt} D {0.nfiles} N '.format(self)
+                path = remove_surrogates(item[b'path']) if item else ''
+                space = columns - swidth(msg)
+                if space < swidth('...') + swidth(path):
+                    path = '%s...%s' % (path[:(space // 2) - swidth('...')], path[-space // 2:])
+                msg += "{0:<{space}}".format(path, space=space)
+            else:
+                msg = ' ' * columns
+            print(msg, file=stream or sys.stderr, end="\r", flush=True)
+
+
 class DownloadPipeline:
 
     def __init__(self, repository, key):

+ 2 - 2
borg/archiver.py

@@ -22,7 +22,7 @@ from . import __version__
 from .helpers import Error, location_validator, archivename_validator, format_time, format_file_size, \
     parse_pattern, PathPrefixPattern, to_localtime, timestamp, \
     get_cache_dir, prune_within, prune_split, bin_to_hex, safe_encode, \
-    Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \
+    Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, \
     dir_is_tagged, ChunkerParams, CompressionSpec, is_slow_msgpack, yes, sysinfo, \
     log_multi, PatternMatcher, ItemFormatter
 from .logger import create_logger, setup_logging
@@ -34,7 +34,7 @@ from .repository import Repository
 from .cache import Cache
 from .constants import *  # NOQA
 from .key import key_creator, RepoKey, PassphraseKey
-from .archive import Archive, ArchiveChecker, ArchiveRecreater
+from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics
 from .remote import RepositoryServer, RemoteRepository, cache_if_remote
 from .selftest import selftest
 from .hashindex import ChunkIndexEntry

+ 0 - 52
borg/helpers.py

@@ -11,7 +11,6 @@ import stat
 import textwrap
 import pwd
 import re
-from shutil import get_terminal_size
 import sys
 from string import Formatter
 import platform
@@ -172,57 +171,6 @@ def prune_split(archives, pattern, n, skip=[]):
     return keep
 
 
-class Statistics:
-
-    def __init__(self):
-        self.osize = self.csize = self.usize = self.nfiles = 0
-        self.last_progress = 0  # timestamp when last progress was shown
-
-    def update(self, size, csize, unique):
-        self.osize += size
-        self.csize += csize
-        if unique:
-            self.usize += csize
-
-    summary = """\
-                       Original size      Compressed size    Deduplicated size
-{label:15} {stats.osize_fmt:>20s} {stats.csize_fmt:>20s} {stats.usize_fmt:>20s}"""
-
-    def __str__(self):
-        return self.summary.format(stats=self, label='This archive:')
-
-    def __repr__(self):
-        return "<{cls} object at {hash:#x} ({self.osize}, {self.csize}, {self.usize})>".format(cls=type(self).__name__, hash=id(self), self=self)
-
-    @property
-    def osize_fmt(self):
-        return format_file_size(self.osize)
-
-    @property
-    def usize_fmt(self):
-        return format_file_size(self.usize)
-
-    @property
-    def csize_fmt(self):
-        return format_file_size(self.csize)
-
-    def show_progress(self, item=None, final=False, stream=None, dt=None):
-        now = time.time()
-        if dt is None or now - self.last_progress > dt:
-            self.last_progress = now
-            columns, lines = get_terminal_size()
-            if not final:
-                msg = '{0.osize_fmt} O {0.csize_fmt} C {0.usize_fmt} D {0.nfiles} N '.format(self)
-                path = remove_surrogates(item[b'path']) if item else ''
-                space = columns - len(msg)
-                if space < len('...') + len(path):
-                    path = '%s...%s' % (path[:(space // 2) - len('...')], path[-space // 2:])
-                msg += "{0:<{space}}".format(path, space=space)
-            else:
-                msg = ' ' * columns
-            print(msg, file=stream or sys.stderr, end="\r", flush=True)
-
-
 def get_home_dir():
     """Get user's home directory while preferring a possibly set HOME
     environment variable

+ 4 - 4
borg/platform.py

@@ -1,10 +1,10 @@
 import sys
 
-from .platform_base import acl_get, acl_set, SyncFile, sync_dir, set_flags, get_flags, API_VERSION
+from .platform_base import acl_get, acl_set, SyncFile, sync_dir, set_flags, get_flags, swidth, API_VERSION
 
 if sys.platform.startswith('linux'):  # pragma: linux only
-    from .platform_linux import acl_get, acl_set, SyncFile, set_flags, get_flags, API_VERSION
+    from .platform_linux import acl_get, acl_set, SyncFile, set_flags, get_flags, swidth, API_VERSION
 elif sys.platform.startswith('freebsd'):  # pragma: freebsd only
-    from .platform_freebsd import acl_get, acl_set, API_VERSION
+    from .platform_freebsd import acl_get, acl_set, swidth, API_VERSION
 elif sys.platform == 'darwin':  # pragma: darwin only
-    from .platform_darwin import acl_get, acl_set, API_VERSION
+    from .platform_darwin import acl_get, acl_set, swidth, API_VERSION

+ 8 - 0
borg/platform_base.py

@@ -90,3 +90,11 @@ class SyncFile:
         self.sync()
         self.fd.close()
         sync_dir(os.path.dirname(self.fd.name))
+
+
+def swidth(s):
+    """terminal output width of string <s>
+
+    For western scripts, this is just len(s), but for cjk glyphs, 2 cells are used.
+    """
+    return len(s)

+ 1 - 0
borg/platform_darwin.pyx

@@ -1,5 +1,6 @@
 import os
 from .helpers import user2uid, group2gid, safe_decode, safe_encode
+from .platform_posix import swidth
 
 API_VERSION = 3
 

+ 1 - 0
borg/platform_freebsd.pyx

@@ -1,5 +1,6 @@
 import os
 from .helpers import posix_acl_use_stored_uid_gid, safe_encode, safe_decode
+from .platform_posix import swidth
 
 API_VERSION = 3
 

+ 2 - 0
borg/platform_linux.pyx

@@ -5,6 +5,8 @@ import stat
 
 from .helpers import posix_acl_use_stored_uid_gid, user2uid, group2gid, safe_decode, safe_encode
 from .platform_base import SyncFile as BaseSyncFile
+from .platform_posix import swidth
+
 from libc cimport errno
 
 API_VERSION = 3

+ 5 - 0
borg/platform_posix.pyx

@@ -0,0 +1,5 @@
+cdef extern from "wchar.h":
+    cdef int wcswidth(const Py_UNICODE *str, size_t n)
+ 
+def swidth(s):
+    return wcswidth(s, len(s))

+ 51 - 1
borg/testsuite/archive.py

@@ -1,14 +1,64 @@
+import os
 from datetime import datetime, timezone
+from io import StringIO
 from unittest.mock import Mock
 
+import pytest
 import msgpack
 
-from ..archive import Archive, CacheChunkBuffer, RobustUnpacker
+from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, Statistics
 from ..key import PlaintextKey
 from ..helpers import Manifest
 from . import BaseTestCase
 
 
+@pytest.fixture()
+def stats():
+    stats = Statistics()
+    stats.update(20, 10, unique=True)
+    return stats
+
+
+def test_stats_basic(stats):
+    assert stats.osize == 20
+    assert stats.csize == stats.usize == 10
+    stats.update(20, 10, unique=False)
+    assert stats.osize == 40
+    assert stats.csize == 20
+    assert stats.usize == 10
+
+
+def tests_stats_progress(stats, columns=80):
+    os.environ['COLUMNS'] = str(columns)
+    out = StringIO()
+    stats.show_progress(stream=out)
+    s = '20 B O 10 B C 10 B D 0 N '
+    buf = ' ' * (columns - len(s))
+    assert out.getvalue() == s + buf + "\r"
+
+    out = StringIO()
+    stats.update(10**3, 0, unique=False)
+    stats.show_progress(item={b'path': 'foo'}, final=False, stream=out)
+    s = '1.02 kB O 10 B C 10 B D 0 N foo'
+    buf = ' ' * (columns - len(s))
+    assert out.getvalue() == s + buf + "\r"
+    out = StringIO()
+    stats.show_progress(item={b'path': 'foo'*40}, final=False, stream=out)
+    s = '1.02 kB O 10 B C 10 B D 0 N foofoofoofoofoofoofoofo...oofoofoofoofoofoofoofoofoo'
+    buf = ' ' * (columns - len(s))
+    assert out.getvalue() == s + buf + "\r"
+
+
+def test_stats_format(stats):
+    assert str(stats) == """\
+                       Original size      Compressed size    Deduplicated size
+This archive:                   20 B                 10 B                 10 B"""
+    s = "{0.osize_fmt}".format(stats)
+    assert s == "20 B"
+    # kind of redundant, but id is variable so we can't match reliably
+    assert repr(stats) == '<Statistics object at {:#x} (20, 10, 10)>'.format(id(stats))
+
+
 class MockCache:
 
     def __init__(self):

+ 1 - 49
borg/testsuite/helpers.py

@@ -1,7 +1,6 @@
 import hashlib
 from time import mktime, strptime
 from datetime import datetime, timezone, timedelta
-from io import StringIO
 import os
 
 import pytest
@@ -11,7 +10,7 @@ import msgpack.fallback
 import time
 
 from ..helpers import Location, format_file_size, format_timedelta, make_path_safe, clean_lines, \
-    prune_within, prune_split, get_cache_dir, get_keys_dir, Statistics, is_slow_msgpack, \
+    prune_within, prune_split, get_cache_dir, get_keys_dir, is_slow_msgpack, \
     yes, TRUISH, FALSISH, DEFAULTISH, \
     StableDict, int_to_bigint, bigint_to_int, bin_to_hex, parse_timestamp, ChunkerParams, Chunk, \
     ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern, \
@@ -629,53 +628,6 @@ def test_get_keys_dir():
         os.environ['BORG_KEYS_DIR'] = old_env
 
 
-@pytest.fixture()
-def stats():
-    stats = Statistics()
-    stats.update(20, 10, unique=True)
-    return stats
-
-
-def test_stats_basic(stats):
-    assert stats.osize == 20
-    assert stats.csize == stats.usize == 10
-    stats.update(20, 10, unique=False)
-    assert stats.osize == 40
-    assert stats.csize == 20
-    assert stats.usize == 10
-
-
-def tests_stats_progress(stats, columns=80):
-    os.environ['COLUMNS'] = str(columns)
-    out = StringIO()
-    stats.show_progress(stream=out)
-    s = '20 B O 10 B C 10 B D 0 N '
-    buf = ' ' * (columns - len(s))
-    assert out.getvalue() == s + buf + "\r"
-
-    out = StringIO()
-    stats.update(10**3, 0, unique=False)
-    stats.show_progress(item={b'path': 'foo'}, final=False, stream=out)
-    s = '1.02 kB O 10 B C 10 B D 0 N foo'
-    buf = ' ' * (columns - len(s))
-    assert out.getvalue() == s + buf + "\r"
-    out = StringIO()
-    stats.show_progress(item={b'path': 'foo'*40}, final=False, stream=out)
-    s = '1.02 kB O 10 B C 10 B D 0 N foofoofoofoofoofoofoofo...oofoofoofoofoofoofoofoofoo'
-    buf = ' ' * (columns - len(s))
-    assert out.getvalue() == s + buf + "\r"
-
-
-def test_stats_format(stats):
-    assert str(stats) == """\
-                       Original size      Compressed size    Deduplicated size
-This archive:                   20 B                 10 B                 10 B"""
-    s = "{0.osize_fmt}".format(stats)
-    assert s == "20 B"
-    # kind of redundant, but id is variable so we can't match reliably
-    assert repr(stats) == '<Statistics object at {:#x} (20, 10, 10)>'.format(id(stats))
-
-
 def test_file_size():
     """test the size formatting routines"""
     si_size_map = {

+ 14 - 1
borg/testsuite/platform.py

@@ -4,7 +4,7 @@ import sys
 import tempfile
 import unittest
 
-from ..platform import acl_get, acl_set
+from ..platform import acl_get, acl_set, swidth
 from . import BaseTestCase
 
 
@@ -138,3 +138,16 @@ class PlatformDarwinTestCase(BaseTestCase):
         self.set_acl(file2.name, b'!#acl 1\ngroup:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:staff:0:allow:read\nuser:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read\n', numeric_owner=True)
         self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:wheel:0:allow:read', self.get_acl(file2.name)[b'acl_extended'])
         self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000::0:allow:read', self.get_acl(file2.name, numeric_owner=True)[b'acl_extended'])
+
+
+@unittest.skipUnless(sys.platform.startswith(('linux', 'freebsd', 'darwin')), 'POSIX only tests')
+class PlatformPosixTestCase(BaseTestCase):
+
+    def test_swidth_ascii(self):
+        self.assert_equal(swidth("borg"), 4)
+
+    def test_swidth_cjk(self):
+        self.assert_equal(swidth("バックアップ"), 6 * 2)
+
+    def test_swidth_mixed(self):
+        self.assert_equal(swidth("borgバックアップ"), 4 + 6 * 2)

+ 7 - 1
setup.py

@@ -40,6 +40,7 @@ compress_source = 'borg/compress.pyx'
 crypto_source = 'borg/crypto.pyx'
 chunker_source = 'borg/chunker.pyx'
 hashindex_source = 'borg/hashindex.pyx'
+platform_posix_source = 'borg/platform_posix.pyx'
 platform_linux_source = 'borg/platform_linux.pyx'
 platform_darwin_source = 'borg/platform_darwin.pyx'
 platform_freebsd_source = 'borg/platform_freebsd.pyx'
@@ -60,6 +61,7 @@ try:
                 'borg/crypto.c',
                 'borg/chunker.c', 'borg/_chunker.c',
                 'borg/hashindex.c', 'borg/_hashindex.c',
+                'borg/platform_posix.c',
                 'borg/platform_linux.c',
                 'borg/platform_freebsd.c',
                 'borg/platform_darwin.c',
@@ -75,13 +77,14 @@ except ImportError:
     crypto_source = crypto_source.replace('.pyx', '.c')
     chunker_source = chunker_source.replace('.pyx', '.c')
     hashindex_source = hashindex_source.replace('.pyx', '.c')
+    platform_posix_source = platform_posix_source.replace('.pyx', '.c')
     platform_linux_source = platform_linux_source.replace('.pyx', '.c')
     platform_freebsd_source = platform_freebsd_source.replace('.pyx', '.c')
     platform_darwin_source = platform_darwin_source.replace('.pyx', '.c')
     from distutils.command.build_ext import build_ext
     if not on_rtd and not all(os.path.exists(path) for path in [
         compress_source, crypto_source, chunker_source, hashindex_source,
-        platform_linux_source, platform_freebsd_source]):
+        platform_posix_source, platform_linux_source, platform_freebsd_source]):
         raise ImportError('The GIT version of Borg needs Cython. Install Cython or use a released version.')
 
 
@@ -286,6 +289,9 @@ if not on_rtd:
     Extension('borg.chunker', [chunker_source]),
     Extension('borg.hashindex', [hashindex_source])
 ]
+    if sys.platform.startswith(('linux', 'freebsd', 'darwin')):
+        ext_modules.append(Extension('borg.platform_posix', [platform_posix_source]))
+
     if sys.platform == 'linux':
         ext_modules.append(Extension('borg.platform_linux', [platform_linux_source], libraries=['acl']))
     elif sys.platform.startswith('freebsd'):