trie_test.go 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. package stringx
  2. import (
  3. "testing"
  4. "github.com/stretchr/testify/assert"
  5. )
  6. func TestTrieSimple(t *testing.T) {
  7. trie := NewTrie([]string{
  8. "bc",
  9. "cd",
  10. })
  11. output, keywords, found := trie.Filter("abcd")
  12. assert.True(t, found)
  13. assert.Equal(t, "a***", output)
  14. assert.ElementsMatch(t, []string{"bc", "cd"}, keywords)
  15. }
  16. func TestTrie(t *testing.T) {
  17. tests := []struct {
  18. input string
  19. output string
  20. keywords []string
  21. found bool
  22. }{
  23. {
  24. input: "日本AV演员兼电视、电影演员。无名氏AV女优是xx出道, 日本AV女优们最精彩的表演是AV演员色情表演",
  25. output: "日本****兼电视、电影演员。*****女优是xx出道, ******们最精彩的表演是******表演",
  26. keywords: []string{
  27. "AV演员",
  28. "无名氏",
  29. "AV",
  30. "日本AV女优",
  31. "AV演员色情",
  32. },
  33. found: true,
  34. },
  35. {
  36. input: "完全和谐的文本完全和谐的文本",
  37. output: "完全和谐的文本完全和谐的文本",
  38. keywords: nil,
  39. found: false,
  40. },
  41. {
  42. input: "就一个字不对",
  43. output: "就*个字不对",
  44. keywords: []string{
  45. "一",
  46. },
  47. found: true,
  48. },
  49. {
  50. input: "就一对, AV",
  51. output: "就*对, **",
  52. keywords: []string{
  53. "一",
  54. "AV",
  55. },
  56. found: true,
  57. },
  58. {
  59. input: "就一不对, AV",
  60. output: "就**对, **",
  61. keywords: []string{
  62. "一",
  63. "一不",
  64. "AV",
  65. },
  66. found: true,
  67. },
  68. {
  69. input: "就对, AV",
  70. output: "就对, **",
  71. keywords: []string{
  72. "AV",
  73. },
  74. found: true,
  75. },
  76. {
  77. input: "就对, 一不",
  78. output: "就对, **",
  79. keywords: []string{
  80. "一",
  81. "一不",
  82. },
  83. found: true,
  84. },
  85. {
  86. input: "",
  87. output: "",
  88. keywords: nil,
  89. found: false,
  90. },
  91. }
  92. trie := NewTrie([]string{
  93. "", // no hurts for empty keywords
  94. "一",
  95. "一不",
  96. "AV",
  97. "AV演员",
  98. "无名氏",
  99. "AV演员色情",
  100. "日本AV女优",
  101. })
  102. for _, test := range tests {
  103. t.Run(test.input, func(t *testing.T) {
  104. output, keywords, ok := trie.Filter(test.input)
  105. assert.Equal(t, test.found, ok)
  106. assert.Equal(t, test.output, output)
  107. assert.ElementsMatch(t, test.keywords, keywords)
  108. keywords = trie.FindKeywords(test.input)
  109. assert.ElementsMatch(t, test.keywords, keywords)
  110. })
  111. }
  112. }
  113. func TestTrieSingleWord(t *testing.T) {
  114. trie := NewTrie([]string{
  115. "闹",
  116. }, WithMask('#'))
  117. output, keywords, ok := trie.Filter("今晚真热闹")
  118. assert.ElementsMatch(t, []string{"闹"}, keywords)
  119. assert.True(t, ok)
  120. assert.Equal(t, "今晚真热#", output)
  121. }
  122. func TestTrieOverlap(t *testing.T) {
  123. trie := NewTrie([]string{
  124. "一二三四五",
  125. "二三四五六七八",
  126. }, WithMask('#'))
  127. output, keywords, ok := trie.Filter("零一二三四五六七八九十")
  128. assert.ElementsMatch(t, []string{
  129. "一二三四五",
  130. "二三四五六七八",
  131. }, keywords)
  132. assert.True(t, ok)
  133. assert.Equal(t, "零########九十", output)
  134. }
  135. func TestTrieNested(t *testing.T) {
  136. trie := NewTrie([]string{
  137. "一二三",
  138. "一二三四五",
  139. "一二三四五六七八",
  140. }, WithMask('#'))
  141. output, keywords, ok := trie.Filter("零一二三四五六七八九十")
  142. assert.ElementsMatch(t, []string{
  143. "一二三",
  144. "一二三四五",
  145. "一二三四五六七八",
  146. }, keywords)
  147. assert.True(t, ok)
  148. assert.Equal(t, "零########九十", output)
  149. }