3 months ago · eab8be76a0
--- a/src/borg/_chunker.c
+++ b/src/borg/_chunker.c
@@ -19,15 +19,15 @@ Some properties of buzhash / of this implementation:
 
				     the hash function, e.g. in "X <any 31 bytes> X", the last X would cancel out the influence
			
 
				     of the first X on the hash value.
			
 
				 
			
 
				-(2) the hash table is supposed to have (according to the BUZ) exactly a 50% distribution of
			
 
				-    0/1 bit values per position, but the hard coded table below doesn't fit that property.
			
 
				+(2) The hash table is supposed to have (according to the BUZ) exactly a 50% distribution of
			
 
				+    0/1 bit values per position, but the hard-coded table below doesn't fit that property.
			
 
				 
			
 
				-(3) if you would use a window size divisible by 64, the seed would cancel itself out completely.
			
 
				-    this is why we use a window size of 4095 bytes.
			
 
				+(3) If you would use a window size divisible by 64, the seed would cancel itself out completely.
			
 
				+    This is why we use a window size of 4095 bytes.
			
 
				 
			
 
				 Another quirk is that, even with the 4095 byte window, XORing the entire table by a constant
			
 
				-is equivalent to XORing the hash output with a different constant. but since the seed is stored
			
 
				-encrypted, i think it still serves its purpose.
			
 
				+is equivalent to XORing the hash output with a different constant. But since the seed is stored
			
 
				+encrypted, I think it still serves its purpose.
			
 
				 */
			
 
				 
			
 
				 static uint32_t table_base[] =
			
@@ -174,7 +174,7 @@ chunker_fill(Chunker *c)
 
				         off_t offset = c->bytes_read;
			
 
				         #endif
			
 
				 
			
 
				-        // if we have a os-level file descriptor, use os-level API
			
 
				+        // If we have an OS-level file descriptor, use an OS-level API
			
 
				         n = read(c->fh, c->data + c->position + c->remaining, n);
			
 
				         if(n > 0) {
			
 
				             c->remaining += n;
			
@@ -197,23 +197,23 @@ chunker_fill(Chunker *c)
 
				         if (pagemask == 0)
			
 
				             pagemask = getpagesize() - 1;
			
 
				 
			
 
				-        // We tell the OS that we do not need the data that we just have read any
			
 
				+        // We tell the OS that we no longer need the data we have just read any
			
 
				         // more (that it maybe has in the cache). This avoids that we spoil the
			
 
				         // complete cache with data that we only read once and (due to cache
			
 
				-        // size limit) kick out data from the cache that might be still useful
			
 
				+        // size limit) kick out data from the cache that might still be useful
			
 
				         // for the OS or other processes.
			
 
				-        // We rollback the initial offset back to the start of the page,
			
 
				-        // to avoid it not being truncated as a partial page request.
			
 
				+        // We roll back the initial offset to the start of the page,
			
 
				+        // to avoid it being truncated as a partial page request.
			
 
				         int overshoot;
			
 
				         if (length > 0) {
			
 
				             // All Linux kernels (at least up to and including 4.6(.0)) have a bug where
			
 
				-            // they truncate last partial page of POSIX_FADV_DONTNEED request, so we need
			
 
				+            // they truncate the last partial page of a POSIX_FADV_DONTNEED request, so we need
			
 
				             // to page-align it ourselves. We'll need the rest of this page on the next
			
 
				             // read (assuming this was not EOF).
			
 
				             overshoot = (offset + length) & pagemask;
			
 
				         } else {
			
 
				             // For length == 0 we set overshoot 0, so the below
			
 
				-            // length - overshoot is 0, which means till end of file for
			
 
				+            // length - overshoot is 0, which means to the end of the file for
			
 
				             // fadvise. This will cancel the final page and is not part
			
 
				             // of the above workaround.
			
 
				             overshoot = 0;
			
@@ -225,7 +225,7 @@ chunker_fill(Chunker *c)
 
				         PyEval_RestoreThread(thread_state);
			
 
				     }
			
 
				     else {
			
 
				-        // no os-level file descriptor, use Python file object API
			
 
				+        // No OS-level file descriptor, use Python file object API
			
 
				         data = PyObject_CallMethod(c->fd, "read", "i", n);
			
 
				         if(!data) {
			
 
				             return 0;
			
@@ -266,7 +266,7 @@ chunker_process(Chunker *c)
 
				             return NULL;
			
 
				         }
			
 
				     }
			
 
				-    /* here we either are at eof ... */
			
 
				+    /* Here we are either at EOF ... */
			
 
				     if(c->eof) {
			
 
				         c->done = 1;
			
 
				         if(c->remaining) {
			
--- a/src/borg/_hashindex.c
+++ b/src/borg/_hashindex.c
@@ -59,18 +59,18 @@ typedef struct {
 
				 #endif
			
 
				 } HashIndex;
			
 
				 
			
 
				-/* prime (or w/ big prime factors) hash table sizes
			
 
				- * not sure we need primes for borg's usage (as we have a hash function based
			
 
				- * on sha256, we can assume an even, seemingly random distribution of values),
			
 
				+/* Prime (or with big prime factors) hash table sizes
			
 
				+  * Not sure we need primes for Borg's usage (as we have a hash function based
			
 
				+  * on SHA-256, we can assume an even, seemingly random distribution of values),
			
 
				  * but OTOH primes don't harm.
			
 
				- * also, growth of the sizes starts with fast-growing 2x steps, but slows down
			
 
				- * more and more down to 1.1x. this is to avoid huge jumps in memory allocation,
			
 
				+  * Also, growth of the sizes starts with fast-growing 2x steps but slows down
			
 
				+  * more and more down to 1.1x. This is to avoid huge jumps in memory allocation,
			
 
				  * like e.g. 4G -> 8G.
			
 
				  * these values are generated by hash_sizes.py.
			
 
				  *
			
 
				- * update: no, we don't need primes or w/ big prime factors, we followed some
			
 
				+  * Update: no, we don't need primes or with big prime factors; we followed some
			
 
				  *         incomplete / irrelevant advice here that did not match our use case.
			
 
				- *         otoh, for now, we do not need to change the sizes as they do no harm.
			
 
				+  *         OTOH, for now, we do not need to change the sizes as they do no harm.
			
 
				  *         see ticket #2830.
			
 
				  */
			
 
				 static int hash_sizes[] = {
			
@@ -82,7 +82,7 @@ static int hash_sizes[] = {
 
				     306647623, 337318939, 370742809, 408229973, 449387209, 493428073,
			
 
				     543105119, 596976533, 657794869, 722676499, 795815791, 874066969,
			
 
				     962279771, 1057701643, 1164002657, 1280003147, 1407800297, 1548442699,
			
 
				-    1703765389, 1873768367, 2062383853, /* 32bit int ends about here */
			
 
				+    1703765389, 1873768367, 2062383853, /* 32-bit int ends about here */
			
 
				 };
			
 
				 
			
 
				 #define HASH_MIN_LOAD .25
			
@@ -326,7 +326,7 @@ hashindex_read(PyObject *file_py, int permit_compact)
 
				     Py_XDECREF(tmp);
			
 
				     if(PyErr_Occurred()) {
			
 
				         if(PyErr_ExceptionMatches(PyExc_AttributeError)) {
			
 
				-            /* Be able to work with regular file objects which do not have a hash_part method. */
			
 
				+            /* Be able to work with regular file objects that do not have a hash_part method. */
			
 
				             PyErr_Clear();
			
 
				         } else {
			
 
				             goto fail_decref_header;
			
@@ -341,7 +341,7 @@ hashindex_read(PyObject *file_py, int permit_compact)
 
				     length = PyNumber_AsSsize_t(length_object, PyExc_OverflowError);
			
 
				     Py_DECREF(length_object);
			
 
				     if(PyErr_Occurred()) {
			
 
				-        /* This shouldn't generally happen; but can if seek() returns something that's not a number */
			
 
				+        /* This shouldn't generally happen, but it can if seek() returns something that's not a number */
			
 
				         goto fail_decref_header;
			
 
				     }
			
 
				 
			
@@ -528,7 +528,7 @@ hashindex_write(HashIndex *index, PyObject *file_py)
 
				     Py_XDECREF(tmp);
			
 
				     if(PyErr_Occurred()) {
			
 
				         if(PyErr_ExceptionMatches(PyExc_AttributeError)) {
			
 
				-            /* Be able to work with regular file objects which do not have a hash_part method. */
			
 
				+            /* Be able to work with regular file objects that do not have a hash_part method. */
			
 
				             PyErr_Clear();
			
 
				         } else {
			
 
				             return;
			
--- a/src/borg/crypto/_crypto_helpers.c
+++ b/src/borg/crypto/_crypto_helpers.c
@@ -1,4 +1,4 @@
 
				-/* some helpers, so our code also works with OpenSSL 1.0.x */
			
 
				+/* Some helpers so that our code also works with OpenSSL 1.0.x. */
			
 
				 
			
 
				 #include <openssl/opensslv.h>
			
 
				 #include <openssl/hmac.h>