_speedups.c 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. #include <Python.h>
  2. #include <structmember.h>
  3. static unsigned long int
  4. checksum(const unsigned char *data, int len, unsigned long int sum)
  5. {
  6. unsigned long int s1, s2, i;
  7. s1 = sum & 0xffff;
  8. s2 = sum >> 16;
  9. for(i=0; i < len; i++)
  10. {
  11. s1 += data[i] + 1;
  12. s2 += s1;
  13. }
  14. return ((s2 & 0xffff) << 16) | (s1 & 0xffff);
  15. }
  16. static unsigned long int
  17. roll_checksum(unsigned long int sum, unsigned char remove, unsigned char add, int len)
  18. {
  19. unsigned long int s1, s2;
  20. s1 = sum & 0xffff;
  21. s2 = sum >> 16;
  22. s1 -= remove - add;
  23. s2 -= len * (remove + 1) - s1;
  24. return ((s2 & 0xffff) << 16) | (s1 & 0xffff);
  25. }
  26. typedef struct {
  27. PyObject_HEAD
  28. int chunk_size, window_size, i, last, eof, done, buf_size, data_len, initial;
  29. PyObject *chunks, *fd;
  30. unsigned long int sum;
  31. unsigned char *data, add, remove;
  32. } ChunkifyIter;
  33. static PyObject*
  34. ChunkifyIter_iter(PyObject *self)
  35. {
  36. ChunkifyIter *c = (ChunkifyIter *)self;
  37. c->data_len = 0;
  38. c->done = 0;
  39. c->eof = 0;
  40. c->i = 0;
  41. c->sum = 0;
  42. c->last = -1;
  43. c->initial = c->window_size;
  44. Py_INCREF(self);
  45. return self;
  46. }
  47. static void
  48. ChunkifyIter_dealloc(PyObject *self)
  49. {
  50. ChunkifyIter *c = (ChunkifyIter *)self;
  51. Py_DECREF(c->fd);
  52. free(c->data);
  53. self->ob_type->tp_free(self);
  54. }
  55. static PyObject*
  56. ChunkifyIter_iternext(PyObject *self)
  57. {
  58. ChunkifyIter *c = (ChunkifyIter *)self;
  59. if(c->done)
  60. {
  61. PyErr_SetNone(PyExc_StopIteration);
  62. return NULL;
  63. }
  64. for(;;)
  65. {
  66. if(c->i == c->buf_size)
  67. {
  68. int diff = c->last + 1 - c->window_size;
  69. memmove(c->data, c->data + diff, c->buf_size - diff);
  70. c->i -= diff;
  71. c->last -= diff;
  72. c->data_len -= diff;
  73. assert(c->i >= 0);
  74. assert(c->last >= -1);
  75. assert(c->data_len >= 0);
  76. }
  77. if(c->i == c->data_len)
  78. {
  79. PyObject *data = PyObject_CallMethod(c->fd, "read", "i", c->buf_size - c->data_len);
  80. int n = PyString_Size(data);
  81. memcpy(c->data + c->data_len, PyString_AsString(data), n);
  82. c->data_len += n;
  83. Py_DECREF(data);
  84. }
  85. if(c->i == c->data_len)
  86. {
  87. if(c->last < c->i - 1) {
  88. c->done = 1;
  89. return PyString_FromStringAndSize((char *)(c->data + c->last + 1),
  90. c->data_len - c->last - 1);
  91. }
  92. PyErr_SetNone(PyExc_StopIteration);
  93. return NULL;
  94. }
  95. if(c->initial)
  96. {
  97. c->initial--;
  98. c->sum = checksum(c->data + c->i, 1, c->sum);
  99. }
  100. else
  101. {
  102. c->sum = roll_checksum(c->sum,
  103. c->data[c->i - c->window_size],
  104. c->data[c->i],
  105. c->window_size);
  106. }
  107. c->i++;
  108. if(c->i == c->buf_size && c->last == c->window_size - 1)
  109. {
  110. int old_last = c->last;
  111. c->last = c->i - 1;
  112. printf("Max chunk size reached %d\n", c->last - old_last);
  113. return PyString_FromStringAndSize((char *)(c->data + old_last + 1),
  114. c->last - old_last);
  115. }
  116. else if((c->sum % c->chunk_size) == 0)
  117. {
  118. int old_last = c->last;
  119. c->last = c->i - 1;
  120. return PyString_FromStringAndSize((char *)(c->data + old_last + 1),
  121. c->last - old_last);
  122. }
  123. }
  124. PyErr_SetNone(PyExc_StopIteration);
  125. return NULL;
  126. }
  127. static PyTypeObject ChunkifyIterType = {
  128. PyObject_HEAD_INIT(NULL)
  129. 0, /*ob_size*/
  130. "_chunkifier._ChunkifyIter", /*tp_name*/
  131. sizeof(ChunkifyIter), /*tp_basicsize*/
  132. 0, /*tp_itemsize*/
  133. ChunkifyIter_dealloc, /*tp_dealloc*/
  134. 0, /*tp_print*/
  135. 0, /*tp_getattr*/
  136. 0, /*tp_setattr*/
  137. 0, /*tp_compare*/
  138. 0, /*tp_repr*/
  139. 0, /*tp_as_number*/
  140. 0, /*tp_as_sequence*/
  141. 0, /*tp_as_mapping*/
  142. 0, /*tp_hash */
  143. 0, /*tp_call*/
  144. 0, /*tp_str*/
  145. 0, /*tp_getattro*/
  146. 0, /*tp_setattro*/
  147. 0, /*tp_as_buffer*/
  148. Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER,
  149. /* tp_flags: Py_TPFLAGS_HAVE_ITER tells python to
  150. use tp_iter and tp_iternext fields. */
  151. "", /* tp_doc */
  152. 0, /* tp_traverse */
  153. 0, /* tp_clear */
  154. 0, /* tp_richcompare */
  155. 0, /* tp_weaklistoffset */
  156. ChunkifyIter_iter, /* tp_iter: __iter__() method */
  157. ChunkifyIter_iternext /* tp_iternext: next() method */
  158. };
  159. static PyObject *
  160. chunkify(PyObject *self, PyObject *args)
  161. {
  162. PyObject *fd;
  163. long int chunk_size, window_size;
  164. ChunkifyIter *c;
  165. if (!PyArg_ParseTuple(args, "Oii", &fd, &chunk_size, &window_size))
  166. {
  167. return NULL;
  168. }
  169. if (!(c = PyObject_New(ChunkifyIter, &ChunkifyIterType)))
  170. {
  171. return NULL;
  172. }
  173. PyObject_Init((PyObject *)c, &ChunkifyIterType);
  174. c->buf_size = 10 * 1024 * 1024;
  175. c->data = malloc(c->buf_size);
  176. c->fd = fd;
  177. c->chunk_size = chunk_size;
  178. c->window_size = window_size;
  179. Py_INCREF(fd);
  180. return (PyObject *)c;
  181. }
  182. static PyObject *
  183. py_checksum(PyObject *self, PyObject *args)
  184. {
  185. PyObject *data;
  186. unsigned long int sum = 0;
  187. if(!PyArg_ParseTuple(args, "O|k", &data, &sum)) return NULL;
  188. if(!PyString_Check(data))
  189. {
  190. PyErr_SetNone(PyExc_TypeError);
  191. return NULL;
  192. }
  193. return PyInt_FromLong(checksum((unsigned char *)PyString_AsString(data),
  194. PyString_Size(data), sum));
  195. }
  196. static PyObject *
  197. py_roll_checksum(PyObject *self, PyObject *args)
  198. {
  199. unsigned long int sum = 0, len, a, r;
  200. PyObject *add, *remove;
  201. if (!PyArg_ParseTuple(args, "kOOk", &sum, &remove, &add, &len)) return NULL;
  202. if(!PyString_Check(remove) || !PyString_Check(add) ||
  203. PyString_Size(remove) != 1 || PyString_Size(add) != 1)
  204. {
  205. PyErr_SetNone(PyExc_TypeError);
  206. return NULL;
  207. }
  208. a = *((const unsigned char *)PyString_AsString(add));
  209. r = *((const unsigned char *)PyString_AsString(remove));
  210. return PyInt_FromLong(roll_checksum(sum, r, a, len));
  211. }
  212. static PyMethodDef ChunkifierMethods[] = {
  213. {"chunkify", chunkify, METH_VARARGS, ""},
  214. {"checksum", py_checksum, METH_VARARGS, ""},
  215. {"roll_checksum", py_roll_checksum, METH_VARARGS, ""},
  216. {NULL, NULL, 0, NULL} /* Sentinel */
  217. };
  218. PyMODINIT_FUNC
  219. init_speedups(void)
  220. {
  221. PyObject* m;
  222. ChunkifyIterType.tp_new = PyType_GenericNew;
  223. if (PyType_Ready(&ChunkifyIterType) < 0) return;
  224. m = Py_InitModule("_speedups", ChunkifierMethods);
  225. Py_INCREF(&ChunkifyIterType);
  226. PyModule_AddObject(m, "_ChunkifyIter", (PyObject *)&ChunkifyIterType);
  227. }