trie.go 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. package stringx
  2. import "github.com/wuntsong-org/go-zero-plus/core/lang"
  3. const defaultMask = '*'
  4. type (
  5. // TrieOption defines the method to customize a Trie.
  6. TrieOption func(trie *trieNode)
  7. // A Trie is a tree implementation that used to find elements rapidly.
  8. Trie interface {
  9. Filter(text string) (string, []string, bool)
  10. FindKeywords(text string) []string
  11. }
  12. trieNode struct {
  13. node
  14. mask rune
  15. }
  16. scope struct {
  17. start int
  18. stop int
  19. }
  20. )
  21. // NewTrie returns a Trie.
  22. func NewTrie(words []string, opts ...TrieOption) Trie {
  23. n := new(trieNode)
  24. for _, opt := range opts {
  25. opt(n)
  26. }
  27. if n.mask == 0 {
  28. n.mask = defaultMask
  29. }
  30. for _, word := range words {
  31. n.add(word)
  32. }
  33. n.build()
  34. return n
  35. }
  36. func (n *trieNode) Filter(text string) (sentence string, keywords []string, found bool) {
  37. chars := []rune(text)
  38. if len(chars) == 0 {
  39. return text, nil, false
  40. }
  41. scopes := n.find(chars)
  42. keywords = n.collectKeywords(chars, scopes)
  43. for _, match := range scopes {
  44. // we don't care about overlaps, not bringing a performance improvement
  45. n.replaceWithAsterisk(chars, match.start, match.stop)
  46. }
  47. return string(chars), keywords, len(keywords) > 0
  48. }
  49. func (n *trieNode) FindKeywords(text string) []string {
  50. chars := []rune(text)
  51. if len(chars) == 0 {
  52. return nil
  53. }
  54. scopes := n.find(chars)
  55. return n.collectKeywords(chars, scopes)
  56. }
  57. func (n *trieNode) collectKeywords(chars []rune, scopes []scope) []string {
  58. set := make(map[string]lang.PlaceholderType)
  59. for _, v := range scopes {
  60. set[string(chars[v.start:v.stop])] = lang.Placeholder
  61. }
  62. var i int
  63. keywords := make([]string, len(set))
  64. for k := range set {
  65. keywords[i] = k
  66. i++
  67. }
  68. return keywords
  69. }
  70. func (n *trieNode) replaceWithAsterisk(chars []rune, start, stop int) {
  71. for i := start; i < stop; i++ {
  72. chars[i] = n.mask
  73. }
  74. }
  75. // WithMask customizes a Trie with keywords masked as given mask char.
  76. func WithMask(mask rune) TrieOption {
  77. return func(n *trieNode) {
  78. n.mask = mask
  79. }
  80. }