bloom.go 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. package bloom
  2. import (
  3. "context"
  4. "errors"
  5. "strconv"
  6. "github.com/wuntsong-org/go-zero-plus/core/hash"
  7. "github.com/wuntsong-org/go-zero-plus/core/stores/redis"
  8. )
  9. // for detailed error rate table, see http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html
  10. // maps as k in the error rate table
  11. const maps = 14
  12. var (
  13. // ErrTooLargeOffset indicates the offset is too large in bitset.
  14. ErrTooLargeOffset = errors.New("too large offset")
  15. setScript = redis.NewScript(`
  16. for _, offset in ipairs(ARGV) do
  17. redis.call("setbit", KEYS[1], offset, 1)
  18. end
  19. `)
  20. testScript = redis.NewScript(`
  21. for _, offset in ipairs(ARGV) do
  22. if tonumber(redis.call("getbit", KEYS[1], offset)) == 0 then
  23. return false
  24. end
  25. end
  26. return true
  27. `)
  28. )
  29. type (
  30. // A Filter is a bloom filter.
  31. Filter struct {
  32. bits uint
  33. bitSet bitSetProvider
  34. }
  35. bitSetProvider interface {
  36. check(ctx context.Context, offsets []uint) (bool, error)
  37. set(ctx context.Context, offsets []uint) error
  38. }
  39. )
  40. // New create a Filter, store is the backed redis, key is the key for the bloom filter,
  41. // bits is how many bits will be used, maps is how many hashes for each addition.
  42. // best practices:
  43. // elements - means how many actual elements
  44. // when maps = 14, formula: 0.7*(bits/maps), bits = 20*elements, the error rate is 0.000067 < 1e-4
  45. // for detailed error rate table, see http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html
  46. func New(store *redis.Redis, key string, bits uint) *Filter {
  47. return &Filter{
  48. bits: bits,
  49. bitSet: newRedisBitSet(store, key, bits),
  50. }
  51. }
  52. // Add adds data into f.
  53. func (f *Filter) Add(data []byte) error {
  54. return f.AddCtx(context.Background(), data)
  55. }
  56. // AddCtx adds data into f with context.
  57. func (f *Filter) AddCtx(ctx context.Context, data []byte) error {
  58. locations := f.getLocations(data)
  59. return f.bitSet.set(ctx, locations)
  60. }
  61. // Exists checks if data is in f.
  62. func (f *Filter) Exists(data []byte) (bool, error) {
  63. return f.ExistsCtx(context.Background(), data)
  64. }
  65. // ExistsCtx checks if data is in f with context.
  66. func (f *Filter) ExistsCtx(ctx context.Context, data []byte) (bool, error) {
  67. locations := f.getLocations(data)
  68. isSet, err := f.bitSet.check(ctx, locations)
  69. if err != nil {
  70. return false, err
  71. }
  72. return isSet, nil
  73. }
  74. func (f *Filter) getLocations(data []byte) []uint {
  75. locations := make([]uint, maps)
  76. for i := uint(0); i < maps; i++ {
  77. hashValue := hash.Hash(append(data, byte(i)))
  78. locations[i] = uint(hashValue % uint64(f.bits))
  79. }
  80. return locations
  81. }
  82. type redisBitSet struct {
  83. store *redis.Redis
  84. key string
  85. bits uint
  86. }
  87. func newRedisBitSet(store *redis.Redis, key string, bits uint) *redisBitSet {
  88. return &redisBitSet{
  89. store: store,
  90. key: key,
  91. bits: bits,
  92. }
  93. }
  94. func (r *redisBitSet) buildOffsetArgs(offsets []uint) ([]string, error) {
  95. var args []string
  96. for _, offset := range offsets {
  97. if offset >= r.bits {
  98. return nil, ErrTooLargeOffset
  99. }
  100. args = append(args, strconv.FormatUint(uint64(offset), 10))
  101. }
  102. return args, nil
  103. }
  104. func (r *redisBitSet) check(ctx context.Context, offsets []uint) (bool, error) {
  105. args, err := r.buildOffsetArgs(offsets)
  106. if err != nil {
  107. return false, err
  108. }
  109. resp, err := r.store.ScriptRunCtx(ctx, testScript, []string{r.key}, args)
  110. if err == redis.Nil {
  111. return false, nil
  112. } else if err != nil {
  113. return false, err
  114. }
  115. exists, ok := resp.(int64)
  116. if !ok {
  117. return false, nil
  118. }
  119. return exists == 1, nil
  120. }
  121. // del only use for testing.
  122. func (r *redisBitSet) del() error {
  123. _, err := r.store.Del(r.key)
  124. return err
  125. }
  126. // expire only use for testing.
  127. func (r *redisBitSet) expire(seconds int) error {
  128. return r.store.Expire(r.key, seconds)
  129. }
  130. func (r *redisBitSet) set(ctx context.Context, offsets []uint) error {
  131. args, err := r.buildOffsetArgs(offsets)
  132. if err != nil {
  133. return err
  134. }
  135. _, err = r.store.ScriptRunCtx(ctx, setScript, []string{r.key}, args)
  136. if err == redis.Nil {
  137. return nil
  138. }
  139. return err
  140. }