Преглед на файлове

chunker: release the gil for long-running C sections and I/O

also: add some benchmarking output showing singlethread, multithread and
multithread-with-gil-releasing-chunker performance.

this changeset maybe improves multithreading performance a little, about 3%
(but that might be close to the measurement accuracy).
Thomas Waldmann преди 10 години
родител
ревизия
bc2f2fc7d2
променени са 2 файла, в които са добавени 99 реда и са изтрити 33 реда
  1. 15 5
      borg/_chunker.c
  2. 84 28
      docs/misc/multithreading.txt

+ 15 - 5
borg/_chunker.c

@@ -125,10 +125,9 @@ chunker_free(Chunker *c)
 }
 
 static int
-chunker_fill(Chunker *c)
+chunker_fill(Chunker *c, PyThreadState **tstatep)
 {
     size_t n;
-    PyObject *data;
     memmove(c->data, c->data + c->last, c->position + c->remaining - c->last);
     c->position -= c->last;
     c->last = 0;
@@ -161,9 +160,12 @@ chunker_fill(Chunker *c)
         #endif
     }
     else {
+        PyEval_RestoreThread(*tstatep);  // acquire GIL
+        PyObject *data;
         // no os-level file descriptor, use Python file object API
         data = PyObject_CallMethod(c->fd, "read", "i", n);
         if(!data) {
+            *tstatep = PyEval_SaveThread();  // release GIL
             return 0;
         }
         n = PyBytes_Size(data);
@@ -176,6 +178,7 @@ chunker_fill(Chunker *c)
             c->eof = 1;
         }
         Py_DECREF(data);
+        *tstatep = PyEval_SaveThread();  // release GIL
     }
     return 1;
 }
@@ -197,8 +200,9 @@ static PyObject *
 chunker_process(Chunker *c)
 {
     uint32_t sum, chunk_mask = c->chunk_mask, min_size = c->min_size, window_size = c->window_size;
-    int n = 0;
+    int n = 0, rc = 0;
     int old_last;
+    PyThreadState *tstate;
 
     if(c->done) {
         if(c->bytes_read == c->bytes_yielded)
@@ -208,7 +212,10 @@ chunker_process(Chunker *c)
         return NULL;
     }
     if(c->remaining <= window_size) {
-        if(!chunker_fill(c)) {
+        tstate = PyEval_SaveThread();  // release GIL
+        rc = chunker_fill(c, &tstate);
+        PyEval_RestoreThread(tstate);  // acquire GIL
+        if(!rc) {
             return NULL;
         }
     }
@@ -226,6 +233,7 @@ chunker_process(Chunker *c)
             return NULL;
         }
     }
+    tstate = PyEval_SaveThread();  // release GIL
     sum = buzhash(c->data + c->position, window_size, c->table);
     while(c->remaining > c->window_size && ((sum & chunk_mask) || n < min_size)) {
         sum = buzhash_update(sum, c->data[c->position],
@@ -235,7 +243,8 @@ chunker_process(Chunker *c)
         c->remaining--;
         n++;
         if(c->remaining <= window_size) {
-            if(!chunker_fill(c)) {
+            if(!chunker_fill(c, &tstate)) {
+                PyEval_RestoreThread(tstate);  // acquire GIL
                 return NULL;
             }
         }
@@ -248,5 +257,6 @@ chunker_process(Chunker *c)
     c->last = c->position;
     n = c->last - old_last;
     c->bytes_yielded += n;
+    PyEval_RestoreThread(tstate);  // acquire GIL
     return PyBuffer_FromMemory(c->data + old_last, n);
 }

+ 84 - 28
docs/misc/multithreading.txt

@@ -1,40 +1,96 @@
 Multithreading
 ==============
 
-With crypto
------------
+multithreading with chunker code that releases the gil
+------------------------------------------------------
 
-master branch (single threaded)
+Duration: 1 minutes 28.78 seconds
 
-    Command being timed: "borg create repo::1 /home/tw/Desktop/"
-    User time (seconds): 13.78
-    System time (seconds): 0.40
-    Percent of CPU this job got: 83%
-    Elapsed (wall clock) time (h:mm:ss or m:ss): 0:16.98
+                       Original size      Compressed size    Deduplicated size
+This archive:                4.01 GB              3.85 GB              3.60 GB
 
-multithreading
+                       Unique chunks         Total chunks
+Chunk index:                    1597                 1724
+------------------------------------------------------------------------------
+    Command being timed: "borg create --compression 6 --chunker-params 18,23,21,4095 --stats /extra/borg/mt::1 /extra/w10.iso"
+    User time (seconds): 286.84
+    System time (seconds): 11.84
+    Percent of CPU this job got: 335%
+    Elapsed (wall clock) time (h:mm:ss or m:ss): 1:29.11
+    Maximum resident set size (kbytes): 132896
+    Average resident set size (kbytes): 0
+    Major (requiring I/O) page faults: 197
+    Minor (reclaiming a frame) page faults: 2617391
+    Voluntary context switches: 57339
+    Involuntary context switches: 99151
+    Swaps: 0
+    File system inputs: 8077456
+    File system outputs: 7043200
+    Socket messages sent: 0
+    Socket messages received: 0
+    Signals delivered: 0
+    Page size (bytes): 4096
+    Exit status: 0
 
-    Command being timed: "borg create repo::1 /home/tw/Desktop/"
-    User time (seconds): 24.08
-    System time (seconds): 1.16
-    Percent of CPU this job got: 249%
-    Elapsed (wall clock) time (h:mm:ss or m:ss): 0:10.11
+multithreaded with gil-holding chunker
+--------------------------------------
 
-Without crypto
---------------
+Duration: 1 minutes 31.72 seconds
 
-master branch (single threaded)
+                       Original size      Compressed size    Deduplicated size
+This archive:                4.01 GB              3.85 GB              3.60 GB
 
-    Command being timed: "borg create repo::1 /home/tw/Desktop/"
-    User time (seconds): 11.51
-    System time (seconds): 0.40
-    Percent of CPU this job got: 86%
-    Elapsed (wall clock) time (h:mm:ss or m:ss): 0:13.85
+                       Unique chunks         Total chunks
+Chunk index:                    1597                 1724
+------------------------------------------------------------------------------
+    Command being timed: "borg create --compression 6 --chunker-params 18,23,21,4095 --stats /extra/borg/mt::1 /extra/w10.iso"
+    User time (seconds): 283.38
+    System time (seconds): 11.97
+    Percent of CPU this job got: 320%
+    Elapsed (wall clock) time (h:mm:ss or m:ss): 1:32.06
+    Maximum resident set size (kbytes): 123640
+    Average resident set size (kbytes): 0
+    Major (requiring I/O) page faults: 198
+    Minor (reclaiming a frame) page faults: 2586472
+    Voluntary context switches: 69560
+    Involuntary context switches: 69897
+    Swaps: 0
+    File system inputs: 8083136
+    File system outputs: 7051768
+    Socket messages sent: 0
+    Socket messages received: 0
+    Signals delivered: 0
+    Page size (bytes): 4096
+    Exit status: 0
 
-multithreading
+no multithreading (code from master branch)
+-------------------------------------------
+
+Duration: 3 minutes 15.83 seconds
+
+                       Original size      Compressed size    Deduplicated size
+This archive:                4.01 GB              3.85 GB              3.60 GB
+
+                       Unique chunks         Total chunks
+Chunk index:                    1597                 1724
+------------------------------------------------------------------------------
+    Command being timed: "borg create --compression 6 --chunker-params 18,23,21,4095 --stats /extra/borg/mt::1 /extra/w10.iso"
+    User time (seconds): 163.02
+    System time (seconds): 5.00
+    Percent of CPU this job got: 85%
+    Elapsed (wall clock) time (h:mm:ss or m:ss): 3:16.11
+    Maximum resident set size (kbytes): 48984
+    Average resident set size (kbytes): 0
+    Major (requiring I/O) page faults: 136
+    Minor (reclaiming a frame) page faults: 154179
+    Voluntary context switches: 39641
+    Involuntary context switches: 1019
+    Swaps: 0
+    File system inputs: 8073280
+    File system outputs: 7043320
+    Socket messages sent: 0
+    Socket messages received: 0
+    Signals delivered: 0
+    Page size (bytes): 4096
+    Exit status: 0
 
-    Command being timed: "borg create repo::1 /home/tw/Desktop/"
-    User time (seconds): 20.27
-    System time (seconds): 1.13
-    Percent of CPU this job got: 260%
-    Elapsed (wall clock) time (h:mm:ss or m:ss): 0:08.22