_speedups.c 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251
  1. #include <Python.h>
  2. #include <structmember.h>
  3. #define MIN(X,Y) ((X) < (Y) ? (X) : (Y))
  4. #define ABS(X) ((X) < 0 ? (-(X)) : (X))
  5. static unsigned long int
  6. checksum(const unsigned char *data, int len, unsigned long int sum)
  7. {
  8. unsigned long int s1, s2, i;
  9. s1 = sum & 0xffff;
  10. s2 = sum >> 16;
  11. for(i=0; i < len; i++)
  12. {
  13. s1 += data[i] + 1;
  14. s2 += s1;
  15. }
  16. return ((s2 & 0xffff) << 16) | (s1 & 0xffff);
  17. }
  18. static unsigned long int
  19. roll_checksum(unsigned long int sum, unsigned char remove, unsigned char add, int len)
  20. {
  21. unsigned long int s1, s2;
  22. s1 = sum & 0xffff;
  23. s2 = sum >> 16;
  24. s1 -= remove - add;
  25. s2 -= len * (remove + 1) - s1;
  26. return ((s2 & 0xffff) << 16) | (s1 & 0xffff);
  27. }
  28. typedef struct {
  29. PyObject_HEAD
  30. int chunk_size, window_size, i, last, eof, done, buf_size, data_len, seed;
  31. PyObject *chunks, *fd;
  32. unsigned long int sum;
  33. unsigned char *data, add, remove;
  34. } ChunkifyIter;
  35. static PyObject*
  36. ChunkifyIter_iter(PyObject *self)
  37. {
  38. ChunkifyIter *c = (ChunkifyIter *)self;
  39. c->data_len = 0;
  40. c->done = 0;
  41. c->eof = 0;
  42. c->i = 0;
  43. c->sum = 0;
  44. c->last = 0;
  45. Py_INCREF(self);
  46. return self;
  47. }
  48. static void
  49. ChunkifyIter_dealloc(PyObject *self)
  50. {
  51. ChunkifyIter *c = (ChunkifyIter *)self;
  52. Py_DECREF(c->fd);
  53. free(c->data);
  54. self->ob_type->tp_free(self);
  55. }
  56. static PyObject*
  57. ChunkifyIter_iternext(PyObject *self)
  58. {
  59. ChunkifyIter *c = (ChunkifyIter *)self;
  60. int initial = c->window_size;
  61. if(c->done)
  62. {
  63. PyErr_SetNone(PyExc_StopIteration);
  64. return NULL;
  65. }
  66. for(;;)
  67. {
  68. if(c->i == c->buf_size)
  69. {
  70. int diff = c->last + 1 - c->window_size;
  71. assert(diff >= 0);
  72. memmove(c->data, c->data + diff, c->buf_size - diff);
  73. c->i -= diff;
  74. c->last -= diff;
  75. c->data_len -= diff;
  76. assert(c->i >= 0);
  77. assert(c->last >= -1);
  78. assert(c->data_len >= 0);
  79. }
  80. if(c->i == c->data_len)
  81. {
  82. PyObject *data = PyObject_CallMethod(c->fd, "read", "i", c->buf_size - c->data_len);
  83. int n = PyString_Size(data);
  84. memcpy(c->data + c->data_len, PyString_AsString(data), n);
  85. c->data_len += n;
  86. Py_DECREF(data);
  87. }
  88. if(c->i == c->data_len)
  89. {
  90. if(c->last < c->i) {
  91. c->done = 1;
  92. return PyString_FromStringAndSize((char *)(c->data + c->last),
  93. c->data_len - c->last);
  94. }
  95. PyErr_SetNone(PyExc_StopIteration);
  96. return NULL;
  97. }
  98. if(initial)
  99. {
  100. int bytes = MIN(initial, c->data_len - c->i);
  101. initial -= bytes;
  102. c->sum = checksum(c->data + c->i, bytes, 0);
  103. c->i += bytes;
  104. }
  105. else
  106. {
  107. c->sum = roll_checksum(c->sum,
  108. c->data[c->i - c->window_size],
  109. c->data[c->i],
  110. c->window_size);
  111. c->i++;
  112. }
  113. if((c->sum % c->chunk_size) == c->seed)
  114. {
  115. int old_last = c->last;
  116. c->last = c->i;
  117. return PyString_FromStringAndSize((char *)(c->data + old_last),
  118. c->last - old_last);
  119. }
  120. if(c->i == c->buf_size && c->last <= c->window_size)
  121. {
  122. int old_last = c->last;
  123. c->last = c->i;
  124. return PyString_FromStringAndSize((char *)(c->data + old_last),
  125. c->last - old_last);
  126. }
  127. }
  128. PyErr_SetNone(PyExc_StopIteration);
  129. return NULL;
  130. }
  131. static PyTypeObject ChunkifyIterType = {
  132. PyObject_HEAD_INIT(NULL)
  133. 0, /*ob_size*/
  134. "_chunkifier._ChunkifyIter", /*tp_name*/
  135. sizeof(ChunkifyIter), /*tp_basicsize*/
  136. 0, /*tp_itemsize*/
  137. ChunkifyIter_dealloc, /*tp_dealloc*/
  138. 0, /*tp_print*/
  139. 0, /*tp_getattr*/
  140. 0, /*tp_setattr*/
  141. 0, /*tp_compare*/
  142. 0, /*tp_repr*/
  143. 0, /*tp_as_number*/
  144. 0, /*tp_as_sequence*/
  145. 0, /*tp_as_mapping*/
  146. 0, /*tp_hash */
  147. 0, /*tp_call*/
  148. 0, /*tp_str*/
  149. 0, /*tp_getattro*/
  150. 0, /*tp_setattro*/
  151. 0, /*tp_as_buffer*/
  152. Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER,
  153. /* tp_flags: Py_TPFLAGS_HAVE_ITER tells python to
  154. use tp_iter and tp_iternext fields. */
  155. "", /* tp_doc */
  156. 0, /* tp_traverse */
  157. 0, /* tp_clear */
  158. 0, /* tp_richcompare */
  159. 0, /* tp_weaklistoffset */
  160. ChunkifyIter_iter, /* tp_iter: __iter__() method */
  161. ChunkifyIter_iternext /* tp_iternext: next() method */
  162. };
  163. static PyObject *
  164. chunkify(PyObject *self, PyObject *args)
  165. {
  166. PyObject *fd;
  167. int chunk_size, window_size, seed;
  168. ChunkifyIter *c;
  169. if (!PyArg_ParseTuple(args, "Oiii", &fd, &chunk_size, &window_size, &seed))
  170. {
  171. return NULL;
  172. }
  173. if (!(c = PyObject_New(ChunkifyIter, &ChunkifyIterType)))
  174. {
  175. return NULL;
  176. }
  177. PyObject_Init((PyObject *)c, &ChunkifyIterType);
  178. c->buf_size = 10 * 1024 * 1024;
  179. c->data = malloc(c->buf_size);
  180. c->fd = fd;
  181. c->chunk_size = chunk_size;
  182. c->window_size = window_size;
  183. c->seed = seed % chunk_size;
  184. Py_INCREF(fd);
  185. return (PyObject *)c;
  186. }
  187. static PyObject *
  188. py_checksum(PyObject *self, PyObject *args)
  189. {
  190. PyObject *data;
  191. unsigned long int sum = 0;
  192. if(!PyArg_ParseTuple(args, "O|k", &data, &sum)) return NULL;
  193. if(!PyString_Check(data))
  194. {
  195. PyErr_SetNone(PyExc_TypeError);
  196. return NULL;
  197. }
  198. return PyInt_FromLong(checksum((unsigned char *)PyString_AsString(data),
  199. PyString_Size(data), sum));
  200. }
  201. static PyObject *
  202. py_roll_checksum(PyObject *self, PyObject *args)
  203. {
  204. unsigned long int sum = 0, len, a, r;
  205. PyObject *add, *remove;
  206. if (!PyArg_ParseTuple(args, "kOOk", &sum, &remove, &add, &len)) return NULL;
  207. if(!PyString_Check(remove) || !PyString_Check(add) ||
  208. PyString_Size(remove) != 1 || PyString_Size(add) != 1)
  209. {
  210. PyErr_SetNone(PyExc_TypeError);
  211. return NULL;
  212. }
  213. a = *((const unsigned char *)PyString_AsString(add));
  214. r = *((const unsigned char *)PyString_AsString(remove));
  215. return PyInt_FromLong(roll_checksum(sum, r, a, len));
  216. }
  217. static PyMethodDef ChunkifierMethods[] = {
  218. {"chunkify", chunkify, METH_VARARGS, ""},
  219. {"checksum", py_checksum, METH_VARARGS, ""},
  220. {"roll_checksum", py_roll_checksum, METH_VARARGS, ""},
  221. {NULL, NULL, 0, NULL} /* Sentinel */
  222. };
  223. PyMODINIT_FUNC
  224. init_speedups(void)
  225. {
  226. PyObject* m;
  227. ChunkifyIterType.tp_new = PyType_GenericNew;
  228. if (PyType_Ready(&ChunkifyIterType) < 0) return;
  229. m = Py_InitModule("_speedups", ChunkifierMethods);
  230. Py_INCREF(&ChunkifyIterType);
  231. PyModule_AddObject(m, "_ChunkifyIter", (PyObject *)&ChunkifyIterType);
  232. }