| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208 | #include <Python.h>#include <structmember.h>#define MIN(X,Y) ((X) < (Y) ? (X) : (Y))#define ABS(X) ((X) < 0 ? (-(X)) : (X))static unsigned long intchecksum(const unsigned char *data, int len, unsigned long int sum){    unsigned long int s1, s2, i;    s1 = sum & 0xffff;    s2 = sum >> 16;    for(i=0; i < len; i++)    {        s1 += data[i] + 1;        s2 += s1;    }    return ((s2 & 0xffff) << 16) | (s1 & 0xffff);}static unsigned long introll_checksum(unsigned long int sum, unsigned char remove, unsigned char add, int len){    unsigned long int s1, s2;    s1 = sum & 0xffff;    s2 = sum >> 16;    s1 -= remove - add;    s2 -= len * (remove + 1) - s1;    return ((s2 & 0xffff) << 16) | (s1 & 0xffff);}typedef struct {    PyObject_HEAD    int chunk_size, window_size, i, last, eof, done, buf_size, data_len, seed;    PyObject *chunks, *fd;    unsigned long int sum;    unsigned char *data, add, remove;} ChunkifyIter;static PyObject*ChunkifyIter_iter(PyObject *self){    ChunkifyIter *c = (ChunkifyIter *)self;    c->data_len = 0;    c->done = 0;    c->eof = 0;    c->i = 0;    c->sum = 0;    c->last = 0;    Py_INCREF(self);    return self;}static voidChunkifyIter_dealloc(PyObject *self){    ChunkifyIter *c = (ChunkifyIter *)self;    Py_DECREF(c->fd);    free(c->data);    self->ob_type->tp_free(self);}static PyObject*ChunkifyIter_iternext(PyObject *self){    ChunkifyIter *c = (ChunkifyIter *)self;    int initial = c->window_size;    if(c->done)    {        PyErr_SetNone(PyExc_StopIteration);        return NULL;    }    for(;;)    {        if(c->i == c->buf_size)        {            int diff = c->last + 1 - c->window_size;            assert(diff >= 0);            memmove(c->data, c->data + diff, c->buf_size - diff);            c->i -= diff;            c->last -= diff;            c->data_len -= diff;            assert(c->i >= 0);            assert(c->last >= -1);            assert(c->data_len >= 0);        }        if(c->i == c->data_len)        {            PyObject *data = PyObject_CallMethod(c->fd, "read", "i", c->buf_size - c->data_len);            int n = PyString_Size(data);            memcpy(c->data + c->data_len, PyString_AsString(data), n);            c->data_len += n;            Py_DECREF(data);        }        if(c->i == c->data_len)        {            if(c->last < c->i) {                c->done = 1;                return PyString_FromStringAndSize((char *)(c->data + c->last),                                                  c->data_len - c->last);            }            PyErr_SetNone(PyExc_StopIteration);            return NULL;        }        if(initial)        {            int bytes = MIN(initial, c->data_len - c->i);            initial -= bytes;            c->sum = checksum(c->data + c->i, bytes, 0);            c->i += bytes;        }        else        {            c->sum = roll_checksum(c->sum,                                   c->data[c->i - c->window_size],                                   c->data[c->i],                                   c->window_size);            c->i++;        }        if((c->sum % c->chunk_size) == c->seed ||           (c->i == c->buf_size && c->last <= c->window_size))        {            int old_last = c->last;            c->last = c->i;            return PyString_FromStringAndSize((char *)(c->data + old_last),                                              c->last - old_last);        }    }    PyErr_SetNone(PyExc_StopIteration);    return NULL;}static PyTypeObject ChunkifyIterType = {    PyObject_HEAD_INIT(NULL)    0,                         /*ob_size*/    "_chunkifier._ChunkifyIter",       /*tp_name*/    sizeof(ChunkifyIter),       /*tp_basicsize*/    0,                         /*tp_itemsize*/    ChunkifyIter_dealloc,      /*tp_dealloc*/    0,                         /*tp_print*/    0,                         /*tp_getattr*/    0,                         /*tp_setattr*/    0,                         /*tp_compare*/    0,                         /*tp_repr*/    0,                         /*tp_as_number*/    0,                         /*tp_as_sequence*/    0,                         /*tp_as_mapping*/    0,                         /*tp_hash */    0,                         /*tp_call*/    0,                         /*tp_str*/    0,                         /*tp_getattro*/    0,                         /*tp_setattro*/    0,                         /*tp_as_buffer*/    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER,      /* tp_flags: Py_TPFLAGS_HAVE_ITER tells python to         use tp_iter and tp_iternext fields. */    "",           /* tp_doc */    0,  /* tp_traverse */    0,  /* tp_clear */    0,  /* tp_richcompare */    0,  /* tp_weaklistoffset */    ChunkifyIter_iter,  /* tp_iter: __iter__() method */    ChunkifyIter_iternext  /* tp_iternext: next() method */};static PyObject *chunkify(PyObject *self, PyObject *args){    PyObject *fd;    int chunk_size, window_size, seed;    ChunkifyIter *c;    if (!PyArg_ParseTuple(args, "Oiii", &fd, &chunk_size, &window_size, &seed))    {        return NULL;    }    if (!(c = PyObject_New(ChunkifyIter, &ChunkifyIterType)))    {        return NULL;    }    PyObject_Init((PyObject *)c, &ChunkifyIterType);    c->buf_size = 10 * 1024 * 1024;    c->data = malloc(c->buf_size);    c->fd = fd;    c->chunk_size = chunk_size;    c->window_size = window_size;    c->seed = seed % chunk_size;    Py_INCREF(fd);    return (PyObject *)c;}static PyMethodDef ChunkifierMethods[] = {    {"chunkify",  chunkify, METH_VARARGS, ""},    {NULL, NULL, 0, NULL}        /* Sentinel */};PyMODINIT_FUNCinit_speedups(void){  PyObject* m;  ChunkifyIterType.tp_new = PyType_GenericNew;  if (PyType_Ready(&ChunkifyIterType) < 0)  return;  m = Py_InitModule("_speedups", ChunkifierMethods);}
 |