|  | @@ -1,4 +1,5 @@
 | 
											
												
													
														|  |  #include <Python.h>
 |  |  #include <Python.h>
 | 
											
												
													
														|  | 
 |  | +#include <fcntl.h>
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |  /* Cyclic polynomial / buzhash: https://en.wikipedia.org/wiki/Rolling_hash */
 |  |  /* Cyclic polynomial / buzhash: https://en.wikipedia.org/wiki/Rolling_hash */
 | 
											
												
													
														|  |  
 |  |  
 | 
											
										
											
												
													
														|  | @@ -78,8 +79,9 @@ typedef struct {
 | 
											
												
													
														|  |      int window_size, chunk_mask, min_size;
 |  |      int window_size, chunk_mask, min_size;
 | 
											
												
													
														|  |      size_t buf_size;
 |  |      size_t buf_size;
 | 
											
												
													
														|  |      uint32_t *table;
 |  |      uint32_t *table;
 | 
											
												
													
														|  | -    uint8_t *data;
 |  | 
 | 
											
												
													
														|  | 
 |  | +    uint8_t *data, *read_buf;
 | 
											
												
													
														|  |      PyObject *fd;
 |  |      PyObject *fd;
 | 
											
												
													
														|  | 
 |  | +    int fh;
 | 
											
												
													
														|  |      int done, eof;
 |  |      int done, eof;
 | 
											
												
													
														|  |      size_t remaining, bytes_read, bytes_yielded, position, last;
 |  |      size_t remaining, bytes_read, bytes_yielded, position, last;
 | 
											
												
													
														|  |  } Chunker;
 |  |  } Chunker;
 | 
											
										
											
												
													
														|  | @@ -94,15 +96,17 @@ chunker_init(int window_size, int chunk_mask, int min_size, uint32_t seed)
 | 
											
												
													
														|  |      c->table = buzhash_init_table(seed);
 |  |      c->table = buzhash_init_table(seed);
 | 
											
												
													
														|  |      c->buf_size = 10 * 1024 * 1024;
 |  |      c->buf_size = 10 * 1024 * 1024;
 | 
											
												
													
														|  |      c->data = malloc(c->buf_size);
 |  |      c->data = malloc(c->buf_size);
 | 
											
												
													
														|  | 
 |  | +    c->read_buf = malloc(c->buf_size);
 | 
											
												
													
														|  |      return c;
 |  |      return c;
 | 
											
												
													
														|  |  }
 |  |  }
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |  static void
 |  |  static void
 | 
											
												
													
														|  | -chunker_set_fd(Chunker *c, PyObject *fd)
 |  | 
 | 
											
												
													
														|  | 
 |  | +chunker_set_fd(Chunker *c, PyObject *fd, int fh)
 | 
											
												
													
														|  |  {
 |  |  {
 | 
											
												
													
														|  |      Py_XDECREF(c->fd);
 |  |      Py_XDECREF(c->fd);
 | 
											
												
													
														|  |      c->fd = fd;
 |  |      c->fd = fd;
 | 
											
												
													
														|  |      Py_INCREF(fd);
 |  |      Py_INCREF(fd);
 | 
											
												
													
														|  | 
 |  | +    c->fh = fh;
 | 
											
												
													
														|  |      c->done = 0;
 |  |      c->done = 0;
 | 
											
												
													
														|  |      c->remaining = 0;
 |  |      c->remaining = 0;
 | 
											
												
													
														|  |      c->bytes_read = 0;
 |  |      c->bytes_read = 0;
 | 
											
										
											
												
													
														|  | @@ -118,6 +122,7 @@ chunker_free(Chunker *c)
 | 
											
												
													
														|  |      Py_XDECREF(c->fd);
 |  |      Py_XDECREF(c->fd);
 | 
											
												
													
														|  |      free(c->table);
 |  |      free(c->table);
 | 
											
												
													
														|  |      free(c->data);
 |  |      free(c->data);
 | 
											
												
													
														|  | 
 |  | +    free(c->read_buf);
 | 
											
												
													
														|  |      free(c);
 |  |      free(c);
 | 
											
												
													
														|  |  }
 |  |  }
 | 
											
												
													
														|  |  
 |  |  
 | 
											
										
											
												
													
														|  | @@ -133,20 +138,48 @@ chunker_fill(Chunker *c)
 | 
											
												
													
														|  |      if(c->eof || n == 0) {
 |  |      if(c->eof || n == 0) {
 | 
											
												
													
														|  |          return 1;
 |  |          return 1;
 | 
											
												
													
														|  |      }
 |  |      }
 | 
											
												
													
														|  | -    data = PyObject_CallMethod(c->fd, "read", "i", n);
 |  | 
 | 
											
												
													
														|  | -    if(!data) {
 |  | 
 | 
											
												
													
														|  | -        return 0;
 |  | 
 | 
											
												
													
														|  | -    }
 |  | 
 | 
											
												
													
														|  | -    n = PyBytes_Size(data);
 |  | 
 | 
											
												
													
														|  | -    if(n) {
 |  | 
 | 
											
												
													
														|  | -        memcpy(c->data + c->position + c->remaining, PyBytes_AsString(data), n);
 |  | 
 | 
											
												
													
														|  | -        c->remaining += n;
 |  | 
 | 
											
												
													
														|  | -        c->bytes_read += n;
 |  | 
 | 
											
												
													
														|  | 
 |  | +    if(c->fh >= 0) {
 | 
											
												
													
														|  | 
 |  | +        // if we have a os-level file descriptor, use os-level API
 | 
											
												
													
														|  | 
 |  | +        n = read(c->fh, c->read_buf, n);
 | 
											
												
													
														|  | 
 |  | +        if(n > 0) {
 | 
											
												
													
														|  | 
 |  | +            memcpy(c->data + c->position + c->remaining, c->read_buf, n);
 | 
											
												
													
														|  | 
 |  | +            c->remaining += n;
 | 
											
												
													
														|  | 
 |  | +            c->bytes_read += n;
 | 
											
												
													
														|  | 
 |  | +        }
 | 
											
												
													
														|  | 
 |  | +        else
 | 
											
												
													
														|  | 
 |  | +        if(n == 0) {
 | 
											
												
													
														|  | 
 |  | +            c->eof = 1;
 | 
											
												
													
														|  | 
 |  | +        }
 | 
											
												
													
														|  | 
 |  | +        else {
 | 
											
												
													
														|  | 
 |  | +            // some error happened
 | 
											
												
													
														|  | 
 |  | +            return 0;
 | 
											
												
													
														|  | 
 |  | +        }
 | 
											
												
													
														|  | 
 |  | +        #if ( _XOPEN_SOURCE >= 600 || _POSIX_C_SOURCE >= 200112L )
 | 
											
												
													
														|  | 
 |  | +        // We tell the OS that we do not need the data of this file any more
 | 
											
												
													
														|  | 
 |  | +        // that it maybe has in the cache. This avoids that we spoil the
 | 
											
												
													
														|  | 
 |  | +        // complete cache with data that we only read once and (due to cache
 | 
											
												
													
														|  | 
 |  | +        // size limit) kick out data from the cache that might be still useful
 | 
											
												
													
														|  | 
 |  | +        // for the OS or other processes.
 | 
											
												
													
														|  | 
 |  | +        posix_fadvise(c->fh, (off_t) 0, (off_t) 0, POSIX_FADV_DONTNEED);
 | 
											
												
													
														|  | 
 |  | +        #endif
 | 
											
												
													
														|  |      }
 |  |      }
 | 
											
												
													
														|  |      else {
 |  |      else {
 | 
											
												
													
														|  | -        c->eof = 1;
 |  | 
 | 
											
												
													
														|  | 
 |  | +        // no os-level file descriptor, use Python file object API
 | 
											
												
													
														|  | 
 |  | +        data = PyObject_CallMethod(c->fd, "read", "i", n);
 | 
											
												
													
														|  | 
 |  | +        if(!data) {
 | 
											
												
													
														|  | 
 |  | +            return 0;
 | 
											
												
													
														|  | 
 |  | +        }
 | 
											
												
													
														|  | 
 |  | +        n = PyBytes_Size(data);
 | 
											
												
													
														|  | 
 |  | +        if(n) {
 | 
											
												
													
														|  | 
 |  | +            memcpy(c->data + c->position + c->remaining, PyBytes_AsString(data), n);
 | 
											
												
													
														|  | 
 |  | +            c->remaining += n;
 | 
											
												
													
														|  | 
 |  | +            c->bytes_read += n;
 | 
											
												
													
														|  | 
 |  | +        }
 | 
											
												
													
														|  | 
 |  | +        else {
 | 
											
												
													
														|  | 
 |  | +            c->eof = 1;
 | 
											
												
													
														|  | 
 |  | +        }
 | 
											
												
													
														|  | 
 |  | +        Py_DECREF(data);
 | 
											
												
													
														|  |      }
 |  |      }
 | 
											
												
													
														|  | -    Py_DECREF(data);
 |  | 
 | 
											
												
													
														|  |      return 1;
 |  |      return 1;
 | 
											
												
													
														|  |  }
 |  |  }
 | 
											
												
													
														|  |  
 |  |  
 |