hash_sizes.py 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. """
  2. Compute hashtable sizes with nices properties
  3. - prime sizes (for small to medium sizes)
  4. - 2 prime-factor sizes (for big sizes)
  5. - fast growth for small sizes
  6. - slow growth for big sizes
  7. Note:
  8. this is just a tool for developers.
  9. within borgbackup, it is just used to generate hash_sizes definition for _hashindex.c.
  10. """
  11. from collections import namedtuple
  12. K, M, G = 2**10, 2**20, 2**30
  13. # hash table size (in number of buckets)
  14. start, end_p1, end_p2 = 1 * K, 127 * M, 2 * G - 10 * M # stay well below 2^31 - 1
  15. Policy = namedtuple("Policy", "upto grow")
  16. policies = [
  17. # which growth factor to use when growing a hashtable of size < upto
  18. # grow fast (*2.0) at the start so we do not have to resize too often (expensive).
  19. # grow slow (*1.1) for huge hash tables (do not jump too much in memory usage)
  20. Policy(256 * K, 2.0),
  21. Policy(2 * M, 1.7),
  22. Policy(16 * M, 1.4),
  23. Policy(128 * M, 1.2),
  24. Policy(2 * G - 1, 1.1),
  25. ]
  26. # slightly modified version of:
  27. # http://www.macdevcenter.com/pub/a/python/excerpt/pythonckbk_chap1/index1.html?page=2
  28. def eratosthenes():
  29. """Yields the sequence of prime numbers via the Sieve of Eratosthenes."""
  30. D = {} # map each composite integer to its first-found prime factor
  31. q = 2 # q gets 2, 3, 4, 5, ... ad infinitum
  32. while True:
  33. p = D.pop(q, None)
  34. if p is None:
  35. # q not a key in D, so q is prime, therefore, yield it
  36. yield q
  37. # mark q squared as not-prime (with q as first-found prime factor)
  38. D[q * q] = q
  39. else:
  40. # let x <- smallest (N*p)+q which wasn't yet known to be composite
  41. # we just learned x is composite, with p first-found prime factor,
  42. # since p is the first-found prime factor of q -- find and mark it
  43. x = p + q
  44. while x in D:
  45. x += p
  46. D[x] = p
  47. q += 1
  48. def two_prime_factors(pfix=65537):
  49. """Yields numbers with 2 prime factors pfix and p."""
  50. for p in eratosthenes():
  51. yield pfix * p
  52. def get_grow_factor(size):
  53. for p in policies:
  54. if size < p.upto:
  55. return p.grow
  56. def find_bigger_prime(gen, i):
  57. while True:
  58. p = next(gen)
  59. if p >= i:
  60. return p
  61. def main():
  62. sizes = []
  63. i = start
  64. gen = eratosthenes()
  65. while i < end_p1:
  66. grow_factor = get_grow_factor(i)
  67. p = find_bigger_prime(gen, i)
  68. sizes.append(p)
  69. i = int(i * grow_factor)
  70. gen = two_prime_factors() # for lower ram consumption
  71. while i < end_p2:
  72. grow_factor = get_grow_factor(i)
  73. p = find_bigger_prime(gen, i)
  74. sizes.append(p)
  75. i = int(i * grow_factor)
  76. print(
  77. """\
  78. static int hash_sizes[] = {
  79. %s
  80. };
  81. """
  82. % ", ".join(str(size) for size in sizes)
  83. )
  84. if __name__ == "__main__":
  85. main()