8 years ago · 1287d1ae92
--- a/README.rst
+++ b/README.rst
@@ -92,7 +92,6 @@ Initialize a new backup repository and create a backup archive::
 
				 Now doing another backup, just to show off the great deduplication:
			
 
				 
			
 
				 .. code-block:: none
			
 
				-   :emphasize-lines: 11
			
 
				 
			
 
				     $ borg create -v --stats /path/to/repo::Saturday2 ~/Documents
			
 
				     -----------------------------------------------------------------------------
			
@@ -114,6 +113,22 @@ Now doing another backup, just to show off the great deduplication:
 
				 
			
 
				 For a graphical frontend refer to our complementary project `BorgWeb <https://borgweb.readthedocs.io/>`_.
			
 
				 
			
 
				+Checking Release Authenticity and Security Contact
			
 
				+==================================================
			
 
				+
			
 
				+`Releases <https://github.com/borgbackup/borg/releases>`_ are signed with this GPG key,
			
 
				+please use GPG to verify their authenticity.
			
 
				+
			
 
				+In case you discover a security issue, please use this contact for reporting it privately
			
 
				+and please, if possible, use encrypted E-Mail:
			
 
				+
			
 
				+Thomas Waldmann <tw@waldmann-edv.de>
			
 
				+
			
 
				+GPG Key Fingerprint: 6D5B EF9A DD20 7580 5747  B70F 9F88 FB52 FAF7 B393
			
 
				+
			
 
				+The public key can be fetched from any GPG keyserver, but be careful: you must
			
 
				+use the **full fingerprint** to check that you got the correct key.
			
 
				+
			
 
				 Links
			
 
				 =====
			
 
				 
			
@@ -169,7 +184,7 @@ THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF WHETHER IT FITS YOUR NEEDS.
 
				 
			
 
				 Borg is distributed under a 3-clause BSD license, see `License`_ for the complete license.
			
 
				 
			
 
				-|doc| |build| |coverage|
			
 
				+|doc| |build| |coverage| |bestpractices|
			
 
				 
			
 
				 .. |doc| image:: https://readthedocs.org/projects/borgbackup/badge/?version=stable
			
 
				         :alt: Documentation
			
@@ -186,3 +201,7 @@ Borg is distributed under a 3-clause BSD license, see `License`_ for the complet
 
				 .. |screencast| image:: https://asciinema.org/a/28691.png
			
 
				         :alt: BorgBackup Installation and Basic Usage
			
 
				         :target: https://asciinema.org/a/28691?autoplay=1&speed=2
			
 
				+
			
 
				+.. |bestpractices| image:: https://bestpractices.coreinfrastructure.org/projects/271/badge
			
 
				+        :alt: Best Practices Score
			
 
				+        :target: https://bestpractices.coreinfrastructure.org/projects/271
			
--- a/Vagrantfile
+++ b/Vagrantfile
@@ -387,7 +387,7 @@ Vagrant.configure(2) do |config|
 
				   end
			
 
				 
			
 
				   config.vm.define "wheezy32" do |b|
			
 
				-    b.vm.box = "boxcutter/debian79-i386"
			
 
				+    b.vm.box = "boxcutter/debian711-i386"
			
 
				     b.vm.provision "packages prepare wheezy", :type => :shell, :inline => packages_prepare_wheezy
			
 
				     b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid
			
 
				     b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("wheezy32")
			
@@ -400,7 +400,7 @@ Vagrant.configure(2) do |config|
 
				   end
			
 
				 
			
 
				   config.vm.define "wheezy64" do |b|
			
 
				-    b.vm.box = "boxcutter/debian79"
			
 
				+    b.vm.box = "boxcutter/debian711"
			
 
				     b.vm.provision "packages prepare wheezy", :type => :shell, :inline => packages_prepare_wheezy
			
 
				     b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid
			
 
				     b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("wheezy64")
			
--- a/docs/development.rst
+++ b/docs/development.rst
@@ -10,11 +10,50 @@ This chapter will get you started with |project_name| development.
 
				 |project_name| is written in Python (with a little bit of Cython and C for
			
 
				 the performance critical parts).
			
 
				 
			
 
				+Contributions
			
 
				+-------------
			
 
				+
			
 
				+... are welcome!
			
 
				+
			
 
				+Some guidance for contributors:
			
 
				+
			
 
				+- discuss about changes on github issue tracker, IRC or mailing list
			
 
				+
			
 
				+- choose the branch you base your changesets on wisely:
			
 
				+
			
 
				+  - choose x.y-maint for stuff that should go into next x.y release
			
 
				+    (it usually gets merged into master branch later also)
			
 
				+  - choose master if that does not apply
			
 
				+
			
 
				+- do clean changesets:
			
 
				+
			
 
				+  - focus on some topic, resist changing anything else.
			
 
				+  - do not do style changes mixed with functional changes.
			
 
				+  - try to avoid refactorings mixed with functional changes.
			
 
				+  - if you need to fix something after commit/push:
			
 
				+
			
 
				+    - if there are ongoing reviews: do a fixup commit you can
			
 
				+      merge into the bad commit later.
			
 
				+    - if there are no ongoing reviews or you did not push the
			
 
				+      bad commit yet: edit the commit to include your fix or
			
 
				+      merge the fixup commit before pushing.
			
 
				+  - have a nice, clear, typo-free commit comment
			
 
				+  - if you fixed an issue, refer to it in your commit comment
			
 
				+  - follow the style guide (see below)
			
 
				+
			
 
				+- if you write new code, please add tests and docs for it
			
 
				+
			
 
				+- run the tests, fix anything that comes up
			
 
				+
			
 
				+- make a pull request on github
			
 
				+
			
 
				+- wait for review by other developers
			
 
				+
			
 
				 Code and issues
			
 
				 ---------------
			
 
				 
			
 
				 Code is stored on Github, in the `Borgbackup organization
			
 
				-<https://github.com/borgbackup/borg/>`_. `Issues
			
 
				+https://github.com/borgbackup/borg/>`_. `Issues
			
 
				 <https://github.com/borgbackup/borg/issues>`_ and `pull requests
			
 
				 <https://github.com/borgbackup/borg/pulls>`_ should be sent there as
			
 
				 well. See also the :ref:`support` section for more details.
			
--- a/docs/faq.rst
+++ b/docs/faq.rst
@@ -352,6 +352,8 @@ those files are reported as being added when, really, chunks are
 
				 already used.
			
 
				 
			
 
				 
			
 
				+.. _always_chunking:
			
 
				+
			
 
				 It always chunks all my files, even unchanged ones!
			
 
				 ---------------------------------------------------
			
 
				 
			
--- a/docs/usage.rst
+++ b/docs/usage.rst
@@ -86,6 +86,7 @@ General:
 
				     BORG_FILES_CACHE_TTL
			
 
				         When set to a numeric value, this determines the maximum "time to live" for the files cache
			
 
				         entries (default: 20). The files cache is used to quickly determine whether a file is unchanged.
			
 
				+        The FAQ explains this more detailled in: :ref:`always_chunking`
			
 
				     TMPDIR
			
 
				         where temporary files are stored (might need a lot of temporary space for some operations)
			
 
				 
			
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@@ -732,7 +732,8 @@ Number of files: {0.stats.nfiles}'''.format(
 
				             return 'b'  # block device
			
 
				 
			
 
				     def process_symlink(self, path, st):
			
 
				-        source = os.readlink(path)
			
 
				+        with backup_io():
			
 
				+            source = os.readlink(path)
			
 
				         item = Item(path=make_path_safe(path), source=source)
			
 
				         item.update(self.stat_attrs(st, path))
			
 
				         self.add_item(item)
			
@@ -1009,8 +1010,9 @@ class ArchiveChecker:
 
				         """Fetch a list of all object keys from repository
			
 
				         """
			
 
				         # Explicitly set the initial hash table capacity to avoid performance issues
			
 
				-        # due to hash table "resonance"
			
 
				-        capacity = int(len(self.repository) * 1.35 + 1)  # > len * 1.0 / HASH_MAX_LOAD (see _hashindex.c)
			
 
				+        # due to hash table "resonance".
			
 
				+        # Since reconstruction of archive items can add some new chunks, add 10 % headroom
			
 
				+        capacity = int(len(self.repository) / ChunkIndex.MAX_LOAD_FACTOR * 1.1)
			
 
				         self.chunks = ChunkIndex(capacity)
			
 
				         marker = None
			
 
				         while True:
			
--- a/src/borg/archiver.py
+++ b/src/borg/archiver.py
@@ -379,8 +379,13 @@ class Archiver:
 
				                 if not read_special:
			
 
				                     status = archive.process_symlink(path, st)
			
 
				                 else:
			
 
				-                    st_target = os.stat(path)
			
 
				-                    if is_special(st_target.st_mode):
			
 
				+                    try:
			
 
				+                        st_target = os.stat(path)
			
 
				+                    except OSError:
			
 
				+                        special = False
			
 
				+                    else:
			
 
				+                        special = is_special(st_target.st_mode)
			
 
				+                    if special:
			
 
				                         status = archive.process_file(path, st_target, cache)
			
 
				                     else:
			
 
				                         status = archive.process_symlink(path, st)
			
@@ -1865,11 +1870,14 @@ class Archiver:
 
				         info_epilog = textwrap.dedent("""
			
 
				         This command displays detailed information about the specified archive or repository.
			
 
				 
			
 
				-        The "This archive" line refers exclusively to the given archive:
			
 
				-        "Deduplicated size" is the size of the unique chunks stored only for the
			
 
				-        given archive.
			
 
				+        Please note that the deduplicated sizes of the individual archives do not add
			
 
				+        up to the deduplicated size of the repository ("all archives"), because the two
			
 
				+        are meaning different things:
			
 
				 
			
 
				-        The "All archives" line shows global statistics (all chunks).
			
 
				+        This archive / deduplicated size = amount of data stored ONLY for this archive
			
 
				+                                         = unique chunks of this archive.
			
 
				+        All archives / deduplicated size = amount of data stored in the repo
			
 
				+                                         = all chunks in the repository.
			
 
				         """)
			
 
				         subparser = subparsers.add_parser('info', parents=[common_parser], add_help=False,
			
 
				                                           description=self.do_info.__doc__,
			
@@ -2375,6 +2383,14 @@ def sig_term_handler(signum, stack):
 
				     raise SIGTERMReceived
			
 
				 
			
 
				 
			
 
				+class SIGHUPReceived(BaseException):
			
 
				+    pass
			
 
				+
			
 
				+
			
 
				+def sig_hup_handler(signum, stack):
			
 
				+    raise SIGHUPReceived
			
 
				+
			
 
				+
			
 
				 def setup_signal_handlers():  # pragma: no cover
			
 
				     sigs = []
			
 
				     if hasattr(signal, 'SIGUSR1'):
			
@@ -2383,7 +2399,12 @@ def setup_signal_handlers():  # pragma: no cover
 
				         sigs.append(signal.SIGINFO)  # kill -INFO pid (or ctrl-t)
			
 
				     for sig in sigs:
			
 
				         signal.signal(sig, sig_info_handler)
			
 
				+    # If we received SIGTERM or SIGHUP, catch them and raise a proper exception
			
 
				+    # that can be handled for an orderly exit. SIGHUP is important especially
			
 
				+    # for systemd systems, where logind sends it when a session exits, in
			
 
				+    # addition to any traditional use.
			
 
				     signal.signal(signal.SIGTERM, sig_term_handler)
			
 
				+    signal.signal(signal.SIGHUP, sig_hup_handler)
			
 
				 
			
 
				 
			
 
				 def main():  # pragma: no cover
			
@@ -2438,6 +2459,9 @@ def main():  # pragma: no cover
 
				         tb_log_level = logging.DEBUG
			
 
				         tb = '%s\n%s' % (traceback.format_exc(), sysinfo())
			
 
				         exit_code = EXIT_ERROR
			
 
				+    except SIGHUPReceived:
			
 
				+        msg = 'Received SIGHUP.'
			
 
				+        exit_code = EXIT_ERROR
			
 
				     if msg:
			
 
				         logger.error(msg)
			
 
				     if tb:
			
--- a/src/borg/hashindex.pyx
+++ b/src/borg/hashindex.pyx
@@ -28,6 +28,8 @@ cdef extern from "_hashindex.c":
 
				     uint32_t _htole32(uint32_t v)
			
 
				     uint32_t _le32toh(uint32_t v)
			
 
				 
			
 
				+    double HASH_MAX_LOAD
			
 
				+
			
 
				 
			
 
				 cdef _NoDefault = object()
			
 
				 
			
@@ -50,7 +52,6 @@ assert UINT32_MAX == 2**32-1
 
				 
			
 
				 # module-level constant because cdef's in classes can't have default values
			
 
				 cdef uint32_t _MAX_VALUE = 2**32-1025
			
 
				-MAX_VALUE = _MAX_VALUE
			
 
				 
			
 
				 assert _MAX_VALUE % 2 == 1
			
 
				 
			
@@ -60,6 +61,9 @@ cdef class IndexBase:
 
				     cdef HashIndex *index
			
 
				     cdef int key_size
			
 
				 
			
 
				+    MAX_LOAD_FACTOR = HASH_MAX_LOAD
			
 
				+    MAX_VALUE = _MAX_VALUE
			
 
				+
			
 
				     def __cinit__(self, capacity=0, path=None, key_size=32):
			
 
				         self.key_size = key_size
			
 
				         if path:
			
@@ -296,7 +300,7 @@ cdef class ChunkIndex(IndexBase):
 
				             unique_chunks += 1
			
 
				             values = <uint32_t*> (key + self.key_size)
			
 
				             refcount = _le32toh(values[0])
			
 
				-            assert refcount <= MAX_VALUE, "invalid reference count"
			
 
				+            assert refcount <= _MAX_VALUE, "invalid reference count"
			
 
				             chunks += refcount
			
 
				             unique_size += _le32toh(values[1])
			
 
				             unique_csize += _le32toh(values[2])
			
@@ -358,5 +362,5 @@ cdef class ChunkKeyIterator:
 
				             raise StopIteration
			
 
				         cdef uint32_t *value = <uint32_t *>(self.key + self.key_size)
			
 
				         cdef uint32_t refcount = _le32toh(value[0])
			
 
				-        assert refcount <= MAX_VALUE, "invalid reference count"
			
 
				+        assert refcount <= _MAX_VALUE, "invalid reference count"
			
 
				         return (<char *>self.key)[:self.key_size], ChunkIndexEntry(refcount, _le32toh(value[1]), _le32toh(value[2]))
			
--- a/src/borg/testsuite/archiver.py
+++ b/src/borg/testsuite/archiver.py
@@ -1130,6 +1130,14 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
				         output = self.cmd('create', '--list', '--filter=AM', self.repository_location + '::test3', 'input')
			
 
				         self.assert_in('file1', output)
			
 
				 
			
 
				+    def test_create_read_special_broken_symlink(self):
			
 
				+        os.symlink('somewhere doesnt exist', os.path.join(self.input_path, 'link'))
			
 
				+        self.cmd('init', self.repository_location)
			
 
				+        archive = self.repository_location + '::test'
			
 
				+        self.cmd('create', '--read-special', archive, 'input')
			
 
				+        output = self.cmd('list', archive)
			
 
				+        assert 'input/link -> somewhere doesnt exist' in output
			
 
				+
			
 
				     # def test_cmdline_compatibility(self):
			
 
				     #    self.create_regular_file('file1', size=1024 * 80)
			
 
				     #    self.cmd('init', self.repository_location)
			
--- a/src/borg/testsuite/hashindex.py
+++ b/src/borg/testsuite/hashindex.py
@@ -140,16 +140,16 @@ class HashIndexSizeTestCase(BaseTestCase):
 
				 class HashIndexRefcountingTestCase(BaseTestCase):
			
 
				     def test_chunkindex_limit(self):
			
 
				         idx = ChunkIndex()
			
 
				-        idx[H(1)] = hashindex.MAX_VALUE - 1, 1, 2
			
 
				+        idx[H(1)] = ChunkIndex.MAX_VALUE - 1, 1, 2
			
 
				 
			
 
				         # 5 is arbitray, any number of incref/decrefs shouldn't move it once it's limited
			
 
				         for i in range(5):
			
 
				             # first incref to move it to the limit
			
 
				             refcount, *_ = idx.incref(H(1))
			
 
				-            assert refcount == hashindex.MAX_VALUE
			
 
				+            assert refcount == ChunkIndex.MAX_VALUE
			
 
				         for i in range(5):
			
 
				             refcount, *_ = idx.decref(H(1))
			
 
				-            assert refcount == hashindex.MAX_VALUE
			
 
				+            assert refcount == ChunkIndex.MAX_VALUE
			
 
				 
			
 
				     def _merge(self, refcounta, refcountb):
			
 
				         def merge(refcount1, refcount2):
			
@@ -168,23 +168,23 @@ class HashIndexRefcountingTestCase(BaseTestCase):
 
				     def test_chunkindex_merge_limit1(self):
			
 
				         # Check that it does *not* limit at MAX_VALUE - 1
			
 
				         # (MAX_VALUE is odd)
			
 
				-        half = hashindex.MAX_VALUE // 2
			
 
				-        assert self._merge(half, half) == hashindex.MAX_VALUE - 1
			
 
				+        half = ChunkIndex.MAX_VALUE // 2
			
 
				+        assert self._merge(half, half) == ChunkIndex.MAX_VALUE - 1
			
 
				 
			
 
				     def test_chunkindex_merge_limit2(self):
			
 
				         # 3000000000 + 2000000000 > MAX_VALUE
			
 
				-        assert self._merge(3000000000, 2000000000) == hashindex.MAX_VALUE
			
 
				+        assert self._merge(3000000000, 2000000000) == ChunkIndex.MAX_VALUE
			
 
				 
			
 
				     def test_chunkindex_merge_limit3(self):
			
 
				         # Crossover point: both addition and limit semantics will yield the same result
			
 
				-        half = hashindex.MAX_VALUE // 2
			
 
				-        assert self._merge(half + 1, half) == hashindex.MAX_VALUE
			
 
				+        half = ChunkIndex.MAX_VALUE // 2
			
 
				+        assert self._merge(half + 1, half) == ChunkIndex.MAX_VALUE
			
 
				 
			
 
				     def test_chunkindex_merge_limit4(self):
			
 
				         # Beyond crossover, result of addition would be 2**31
			
 
				-        half = hashindex.MAX_VALUE // 2
			
 
				-        assert self._merge(half + 2, half) == hashindex.MAX_VALUE
			
 
				-        assert self._merge(half + 1, half + 1) == hashindex.MAX_VALUE
			
 
				+        half = ChunkIndex.MAX_VALUE // 2
			
 
				+        assert self._merge(half + 2, half) == ChunkIndex.MAX_VALUE
			
 
				+        assert self._merge(half + 1, half + 1) == ChunkIndex.MAX_VALUE
			
 
				 
			
 
				     def test_chunkindex_add(self):
			
 
				         idx1 = ChunkIndex()
			
@@ -195,17 +195,17 @@ class HashIndexRefcountingTestCase(BaseTestCase):
 
				 
			
 
				     def test_incref_limit(self):
			
 
				         idx1 = ChunkIndex()
			
 
				-        idx1[H(1)] = (hashindex.MAX_VALUE, 6, 7)
			
 
				+        idx1[H(1)] = (ChunkIndex.MAX_VALUE, 6, 7)
			
 
				         idx1.incref(H(1))
			
 
				         refcount, *_ = idx1[H(1)]
			
 
				-        assert refcount == hashindex.MAX_VALUE
			
 
				+        assert refcount == ChunkIndex.MAX_VALUE
			
 
				 
			
 
				     def test_decref_limit(self):
			
 
				         idx1 = ChunkIndex()
			
 
				-        idx1[H(1)] = hashindex.MAX_VALUE, 6, 7
			
 
				+        idx1[H(1)] = ChunkIndex.MAX_VALUE, 6, 7
			
 
				         idx1.decref(H(1))
			
 
				         refcount, *_ = idx1[H(1)]
			
 
				-        assert refcount == hashindex.MAX_VALUE
			
 
				+        assert refcount == ChunkIndex.MAX_VALUE
			
 
				 
			
 
				     def test_decref_zero(self):
			
 
				         idx1 = ChunkIndex()
			
@@ -225,7 +225,7 @@ class HashIndexRefcountingTestCase(BaseTestCase):
 
				     def test_setitem_raises(self):
			
 
				         idx1 = ChunkIndex()
			
 
				         with self.assert_raises(AssertionError):
			
 
				-            idx1[H(1)] = hashindex.MAX_VALUE + 1, 0, 0
			
 
				+            idx1[H(1)] = ChunkIndex.MAX_VALUE + 1, 0, 0
			
 
				 
			
 
				     def test_keyerror(self):
			
 
				         idx = ChunkIndex()
			
@@ -282,14 +282,20 @@ class HashIndexDataTestCase(BaseTestCase):
 
				         idx2 = ChunkIndex()
			
 
				         idx2[H(3)] = 2**32 - 123456, 6, 7
			
 
				         idx1.merge(idx2)
			
 
				-        assert idx1[H(3)] == (hashindex.MAX_VALUE, 6, 7)
			
 
				+        assert idx1[H(3)] == (ChunkIndex.MAX_VALUE, 6, 7)
			
 
				 
			
 
				 
			
 
				 class NSIndexTestCase(BaseTestCase):
			
 
				     def test_nsindex_segment_limit(self):
			
 
				         idx = NSIndex()
			
 
				         with self.assert_raises(AssertionError):
			
 
				-            idx[H(1)] = hashindex.MAX_VALUE + 1, 0
			
 
				+            idx[H(1)] = NSIndex.MAX_VALUE + 1, 0
			
 
				         assert H(1) not in idx
			
 
				-        idx[H(2)] = hashindex.MAX_VALUE, 0
			
 
				+        idx[H(2)] = NSIndex.MAX_VALUE, 0
			
 
				         assert H(2) in idx
			
 
				+
			
 
				+
			
 
				+class AllIndexTestCase(BaseTestCase):
			
 
				+    def test_max_load_factor(self):
			
 
				+        assert NSIndex.MAX_LOAD_FACTOR < 1.0
			
 
				+        assert ChunkIndex.MAX_LOAD_FACTOR < 1.0