_speedups.c 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. #include <Python.h>
  2. #include <structmember.h>
  3. #define MIN(X,Y) ((X) < (Y) ? (X) : (Y))
  4. #define ABS(X) ((X) < 0 ? (-(X)) : (X))
  5. static unsigned long int
  6. checksum(const unsigned char *data, int len, unsigned long int sum)
  7. {
  8. unsigned long int s1, s2, i;
  9. s1 = sum & 0xffff;
  10. s2 = sum >> 16;
  11. for(i=0; i < len; i++)
  12. {
  13. s1 += data[i] + 1;
  14. s2 += s1;
  15. }
  16. return ((s2 & 0xffff) << 16) | (s1 & 0xffff);
  17. }
  18. static unsigned long int
  19. roll_checksum(unsigned long int sum, unsigned char remove, unsigned char add, int len)
  20. {
  21. unsigned long int s1, s2;
  22. s1 = sum & 0xffff;
  23. s2 = sum >> 16;
  24. s1 -= remove - add;
  25. s2 -= len * (remove + 1) - s1;
  26. return ((s2 & 0xffff) << 16) | (s1 & 0xffff);
  27. }
  28. typedef struct {
  29. PyObject_HEAD
  30. int chunk_size, window_size, last, done, buf_size, seed, remaining, position;
  31. PyObject *chunks, *fd;
  32. unsigned char *data;
  33. } ChunkifyIter;
  34. static PyObject*
  35. ChunkifyIter_iter(PyObject *self)
  36. {
  37. ChunkifyIter *c = (ChunkifyIter *)self;
  38. c->remaining = 0;
  39. c->position = 0;
  40. c->done = 0;
  41. c->last = 0;
  42. Py_INCREF(self);
  43. return self;
  44. }
  45. static void
  46. ChunkifyIter_dealloc(PyObject *self)
  47. {
  48. ChunkifyIter *c = (ChunkifyIter *)self;
  49. Py_DECREF(c->fd);
  50. free(c->data);
  51. self->ob_type->tp_free(self);
  52. }
  53. static void
  54. ChunkifyIter_fill(PyObject *self)
  55. {
  56. ChunkifyIter *c = (ChunkifyIter *)self;
  57. memmove(c->data, c->data + c->last, c->position + c->remaining - c->last);
  58. c->position -= c->last;
  59. c->last = 0;
  60. PyObject *data = PyObject_CallMethod(c->fd, "read", "i", c->buf_size - c->position - c->remaining);
  61. int n = PyString_Size(data);
  62. memcpy(c->data + c->position + c->remaining, PyString_AsString(data), n);
  63. c->remaining += n;
  64. Py_DECREF(data);
  65. }
  66. static PyObject*
  67. ChunkifyIter_iternext(PyObject *self)
  68. {
  69. ChunkifyIter *c = (ChunkifyIter *)self;
  70. unsigned long int sum;
  71. if(c->done) {
  72. PyErr_SetNone(PyExc_StopIteration);
  73. return NULL;
  74. }
  75. if(c->remaining <= c->window_size) {
  76. ChunkifyIter_fill(self);
  77. }
  78. if(c->remaining < c->window_size) {
  79. c->done = 1;
  80. if(c->remaining) {
  81. return PyBuffer_FromMemory(c->data + c->position, c->remaining);
  82. }
  83. else {
  84. PyErr_SetNone(PyExc_StopIteration);
  85. return NULL;
  86. }
  87. }
  88. sum = checksum(c->data + c->position, c->window_size, 0);
  89. c->remaining -= c->window_size;
  90. c->position += c->window_size;
  91. while(c->remaining && (sum & 0xffff) != c->seed) {
  92. sum = roll_checksum(sum, c->data[c->position - c->window_size],
  93. c->data[c->position],
  94. c->window_size);
  95. c->position++;
  96. c->remaining--;
  97. if(c->remaining == 0) {
  98. ChunkifyIter_fill(self);
  99. }
  100. }
  101. int old_last = c->last;
  102. c->last = c->position;
  103. return PyBuffer_FromMemory(c->data + old_last, c->last - old_last);
  104. }
  105. static PyTypeObject ChunkifyIterType = {
  106. PyObject_HEAD_INIT(NULL)
  107. 0, /*ob_size*/
  108. "_chunkifier._ChunkifyIter", /*tp_name*/
  109. sizeof(ChunkifyIter), /*tp_basicsize*/
  110. 0, /*tp_itemsize*/
  111. ChunkifyIter_dealloc, /*tp_dealloc*/
  112. 0, /*tp_print*/
  113. 0, /*tp_getattr*/
  114. 0, /*tp_setattr*/
  115. 0, /*tp_compare*/
  116. 0, /*tp_repr*/
  117. 0, /*tp_as_number*/
  118. 0, /*tp_as_sequence*/
  119. 0, /*tp_as_mapping*/
  120. 0, /*tp_hash */
  121. 0, /*tp_call*/
  122. 0, /*tp_str*/
  123. 0, /*tp_getattro*/
  124. 0, /*tp_setattro*/
  125. 0, /*tp_as_buffer*/
  126. Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER,
  127. /* tp_flags: Py_TPFLAGS_HAVE_ITER tells python to
  128. use tp_iter and tp_iternext fields. */
  129. "", /* tp_doc */
  130. 0, /* tp_traverse */
  131. 0, /* tp_clear */
  132. 0, /* tp_richcompare */
  133. 0, /* tp_weaklistoffset */
  134. ChunkifyIter_iter, /* tp_iter: __iter__() method */
  135. ChunkifyIter_iternext /* tp_iternext: next() method */
  136. };
  137. static PyObject *
  138. chunkify(PyObject *self, PyObject *args)
  139. {
  140. PyObject *fd;
  141. int chunk_size, window_size, seed;
  142. ChunkifyIter *c;
  143. if (!PyArg_ParseTuple(args, "Oiii", &fd, &chunk_size, &window_size, &seed))
  144. {
  145. return NULL;
  146. }
  147. if (!(c = PyObject_New(ChunkifyIter, &ChunkifyIterType)))
  148. {
  149. return NULL;
  150. }
  151. PyObject_Init((PyObject *)c, &ChunkifyIterType);
  152. c->buf_size = 10 * 1024 * 1024;
  153. c->data = malloc(c->buf_size);
  154. c->fd = fd;
  155. c->chunk_size = chunk_size;
  156. c->window_size = window_size;
  157. c->seed = seed % chunk_size;
  158. Py_INCREF(fd);
  159. return (PyObject *)c;
  160. }
  161. static PyMethodDef ChunkifierMethods[] = {
  162. {"chunkify", chunkify, METH_VARARGS, ""},
  163. {NULL, NULL, 0, NULL} /* Sentinel */
  164. };
  165. PyMODINIT_FUNC
  166. init_speedups(void)
  167. {
  168. PyObject* m;
  169. ChunkifyIterType.tp_new = PyType_GenericNew;
  170. if (PyType_Ready(&ChunkifyIterType) < 0) return;
  171. m = Py_InitModule("_speedups", ChunkifierMethods);
  172. }