bloom.go 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. package bloom
  2. import (
  3. "errors"
  4. "strconv"
  5. "github.com/tal-tech/go-zero/core/hash"
  6. "github.com/tal-tech/go-zero/core/stores/redis"
  7. )
  8. const (
  9. // for detailed error rate table, see http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html
  10. // maps as k in the error rate table
  11. maps = 14
  12. setScript = `
  13. local key = KEYS[1]
  14. for _, offset in ipairs(ARGV) do
  15. redis.call("setbit", key, offset, 1)
  16. end
  17. `
  18. testScript = `
  19. local key = KEYS[1]
  20. for _, offset in ipairs(ARGV) do
  21. if tonumber(redis.call("getbit", key, offset)) == 0 then
  22. return false
  23. end
  24. end
  25. return true
  26. `
  27. )
  28. var ErrTooLargeOffset = errors.New("too large offset")
  29. type (
  30. BitSetProvider interface {
  31. check([]uint) (bool, error)
  32. set([]uint) error
  33. }
  34. BloomFilter struct {
  35. bits uint
  36. maps uint
  37. bitSet BitSetProvider
  38. }
  39. )
  40. // New create a BloomFilter, store is the backed redis, key is the key for the bloom filter,
  41. // bits is how many bits will be used, maps is how many hashes for each addition.
  42. // best practices:
  43. // elements - means how many actual elements
  44. // when maps = 14, formula: 0.7*(bits/maps), bits = 20*elements, the error rate is 0.000067 < 1e-4
  45. // for detailed error rate table, see http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html
  46. func New(store *redis.Redis, key string, bits uint) *BloomFilter {
  47. return &BloomFilter{
  48. bits: bits,
  49. bitSet: newRedisBitSet(store, key, bits),
  50. }
  51. }
  52. func (f *BloomFilter) Add(data []byte) error {
  53. locations := f.getLocations(data)
  54. err := f.bitSet.set(locations)
  55. if err != nil {
  56. return err
  57. }
  58. return nil
  59. }
  60. func (f *BloomFilter) Exists(data []byte) (bool, error) {
  61. locations := f.getLocations(data)
  62. isSet, err := f.bitSet.check(locations)
  63. if err != nil {
  64. return false, err
  65. }
  66. if !isSet {
  67. return false, nil
  68. }
  69. return true, nil
  70. }
  71. func (f *BloomFilter) getLocations(data []byte) []uint {
  72. locations := make([]uint, maps)
  73. for i := uint(0); i < maps; i++ {
  74. hashValue := hash.Hash(append(data, byte(i)))
  75. locations[i] = uint(hashValue % uint64(f.bits))
  76. }
  77. return locations
  78. }
  79. type redisBitSet struct {
  80. store *redis.Redis
  81. key string
  82. bits uint
  83. }
  84. func newRedisBitSet(store *redis.Redis, key string, bits uint) *redisBitSet {
  85. return &redisBitSet{
  86. store: store,
  87. key: key,
  88. bits: bits,
  89. }
  90. }
  91. func (r *redisBitSet) buildOffsetArgs(offsets []uint) ([]string, error) {
  92. var args []string
  93. for _, offset := range offsets {
  94. if offset >= r.bits {
  95. return nil, ErrTooLargeOffset
  96. }
  97. args = append(args, strconv.FormatUint(uint64(offset), 10))
  98. }
  99. return args, nil
  100. }
  101. func (r *redisBitSet) check(offsets []uint) (bool, error) {
  102. args, err := r.buildOffsetArgs(offsets)
  103. if err != nil {
  104. return false, err
  105. }
  106. resp, err := r.store.Eval(testScript, []string{r.key}, args)
  107. if err == redis.Nil {
  108. return false, nil
  109. } else if err != nil {
  110. return false, err
  111. }
  112. if exists, ok := resp.(int64); !ok {
  113. return false, nil
  114. } else {
  115. return exists == 1, nil
  116. }
  117. }
  118. func (r *redisBitSet) del() error {
  119. _, err := r.store.Del(r.key)
  120. return err
  121. }
  122. func (r *redisBitSet) expire(seconds int) error {
  123. return r.store.Expire(r.key, seconds)
  124. }
  125. func (r *redisBitSet) set(offsets []uint) error {
  126. args, err := r.buildOffsetArgs(offsets)
  127. if err != nil {
  128. return err
  129. }
  130. _, err = r.store.Eval(setScript, []string{r.key}, args)
  131. if err == redis.Nil {
  132. return nil
  133. } else {
  134. return err
  135. }
  136. }