Prechádzať zdrojové kódy

Feature/trie ac automation (#1479)

* fix: trie ac automation issues

* fix: trie ac automation issues

* fix: trie ac automation issues

* fix: trie ac automation issues
Kevin Wan 3 rokov pred
rodič
commit
b3e7d2901f

+ 36 - 37
core/stringx/node.go

@@ -36,32 +36,30 @@ func (n *node) add(word string) {
 }
 
 func (n *node) build() {
-	n.fail = n
+	var nodes []*node
 	for _, child := range n.children {
 		child.fail = n
-		n.buildNode(child)
+		nodes = append(nodes, child)
 	}
-}
-
-func (n *node) buildNode(nd *node) {
-	if nd.children == nil {
-		return
-	}
-
-	var fifo []*node
-	for key, child := range nd.children {
-		fifo = append(fifo, child)
-
-		if fail, ok := nd.fail.children[key]; ok {
-			child.fail = fail
-		} else {
-			child.fail = n
+	for len(nodes) > 0 {
+		nd := nodes[0]
+		nodes = nodes[1:]
+		for key, child := range nd.children {
+			nodes = append(nodes, child)
+			cur := nd
+			for cur != nil {
+				if cur.fail == nil {
+					child.fail = n
+					break
+				}
+				if fail, ok := cur.fail.children[key]; ok {
+					child.fail = fail
+					break
+				}
+				cur = cur.fail
+			}
 		}
 	}
-
-	for _, val := range fifo {
-		n.buildNode(val)
-	}
 }
 
 func (n *node) find(chars []rune) []scope {
@@ -73,27 +71,28 @@ func (n *node) find(chars []rune) []scope {
 		child, ok := cur.children[chars[i]]
 		if ok {
 			cur = child
-		} else if cur == n {
-			continue
 		} else {
-			cur = cur.fail
-			if child, ok = cur.children[chars[i]]; !ok {
+			for cur != n {
+				cur = cur.fail
+				if child, ok = cur.children[chars[i]]; ok {
+					cur = child
+					break
+				}
+			}
+
+			if child == nil {
 				continue
 			}
-			cur = child
 		}
 
-		if child.end {
-			scopes = append(scopes, scope{
-				start: i + 1 - child.depth,
-				stop:  i + 1,
-			})
-		}
-		if child.fail != n && child.fail.end {
-			scopes = append(scopes, scope{
-				start: i + 1 - child.fail.depth,
-				stop:  i + 1,
-			})
+		for child != n {
+			if child.end {
+				scopes = append(scopes, scope{
+					start: i + 1 - child.depth,
+					stop:  i + 1,
+				})
+			}
+			child = child.fail
 		}
 	}
 

+ 87 - 0
core/stringx/node_fuzz_test.go

@@ -0,0 +1,87 @@
+//go:build go1.18
+// +build go1.18
+
+package stringx
+
+import (
+	"fmt"
+	"math/rand"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func FuzzNodeFind(f *testing.F) {
+	rand.Seed(time.Now().UnixNano())
+
+	f.Add(10)
+	f.Fuzz(func(t *testing.T, keys int) {
+		str := Randn(rand.Intn(100) + 50)
+		keywords := make(map[string]struct{})
+		for i := 0; i < keys; i++ {
+			keyword := Randn(rand.Intn(10) + 5)
+			if !strings.Contains(str, keyword) {
+				keywords[keyword] = struct{}{}
+			}
+		}
+
+		size := len(str)
+		var scopes []scope
+		var n node
+		for i := 0; i < size%20; i++ {
+			start := rand.Intn(size)
+			stop := start + rand.Intn(20) + 1
+			if stop > size {
+				stop = size
+			}
+			if start == stop {
+				continue
+			}
+
+			keyword := str[start:stop]
+			if _, ok := keywords[keyword]; ok {
+				continue
+			}
+
+			keywords[keyword] = struct{}{}
+			var pos int
+			for pos <= len(str)-len(keyword) {
+				val := str[pos:]
+				p := strings.Index(val, keyword)
+				if p < 0 {
+					break
+				}
+
+				scopes = append(scopes, scope{
+					start: pos + p,
+					stop:  pos + p + len(keyword),
+				})
+				pos += p + 1
+			}
+		}
+
+		for keyword := range keywords {
+			n.add(keyword)
+		}
+		n.build()
+
+		var buf strings.Builder
+		buf.WriteString("keywords:\n")
+		for key := range keywords {
+			fmt.Fprintf(&buf, "\t%q,\n", key)
+		}
+		buf.WriteString("scopes:\n")
+		for _, scp := range scopes {
+			fmt.Fprintf(&buf, "\t{%d, %d},\n", scp.start, scp.stop)
+		}
+		fmt.Fprintf(&buf, "text:\n\t%s\n", str)
+		defer func() {
+			if r := recover(); r != nil {
+				t.Errorf(buf.String())
+			}
+		}()
+		assert.ElementsMatchf(t, scopes, n.find([]rune(str)), buf.String())
+	})
+}

+ 171 - 1
core/stringx/node_test.go

@@ -1,6 +1,176 @@
 package stringx
 
-import "testing"
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestFuzzNodeCase1(t *testing.T) {
+	keywords := []string{
+		"cs8Zh",
+		"G1OihlVuBz",
+		"K6azS2FBHjI",
+		"DQKvghI4",
+		"l7bA86Sze",
+		"tjBLZhCao",
+		"nEsXmVzP",
+		"cbRh8UE1nO3s",
+		"Wta3R2WcbGP",
+		"jpOIcA",
+		"TtkRr4k9hI",
+		"OKbSo0clAYTtk",
+		"uJs1WToEanlKV",
+		"05Y02iFD2",
+		"x2uJs1WToEanlK",
+		"ieaSWe",
+		"Kg",
+		"FD2bCKFazH",
+	}
+	scopes := []scope{
+		{62, 72},
+		{52, 65},
+		{21, 34},
+		{1, 10},
+		{19, 33},
+		{36, 42},
+		{42, 44},
+		{7, 17},
+	}
+	n := new(node)
+	for _, key := range keywords {
+		n.add(key)
+	}
+	n.build()
+	assert.ElementsMatch(t, scopes, n.find([]rune("Z05Y02iFD2bCKFazHtrx2uJs1WToEanlKVWKieaSWeKgmnUXV0ZjOKbSo0clAYTtkRr4k9hI")))
+}
+
+func TestFuzzNodeCase2(t *testing.T) {
+	keywords := []string{
+		"IP1NPsJKIvt",
+		"Iw7hQARwSTw",
+		"SmZIcA",
+		"OyxHPYkoQzFO",
+		"3suCnuSAS5d",
+		"HUMpbi",
+		"HPdvbGGpY",
+		"49qjMtR8bEt",
+		"a0zrrGKVTJ2",
+		"WbOBcszeo1FfZ",
+		"8tHUi5PJI",
+		"Oa2Di",
+		"6ZWa5nr1tU",
+		"o0LJRfmeXB9bF9",
+		"veF0ehKxH",
+		"Qp73r",
+		"B6Rmds4ELY8",
+		"uNpGybQZG",
+		"Ogm3JqicRZlA4n",
+		"FL6LVErKomc84H",
+		"qv2Pi0xJj3cR1",
+		"bPWLBg4",
+		"hYN8Q4M1sw",
+		"ExkTgNklmlIx",
+		"eVgHHDOxOUEj",
+		"5WPEVv0tR",
+		"CPjnOAqUZgV",
+		"oR3Ogtz",
+		"jwk1Zbg",
+		"DYqguyk8h",
+		"rieroDmpvYFK",
+		"MQ9hZnMjDqrNQe",
+		"EhM4KqkCBd",
+		"m9xalj6q",
+		"d5CTL5mzK",
+		"XJOoTvFtI8U",
+		"iFAwspJ",
+		"iGv8ErnRZIuSWX",
+		"i8C1BqsYX",
+		"vXN1KOaOgU",
+		"GHJFB",
+		"Y6OlAqbZxYG8",
+		"dzd4QscSih4u",
+		"SsLYMkKvB9APx",
+		"gi0huB3",
+		"CMICHDCSvSrgiACXVkN",
+		"MwOvyHbaxdaqpZpU",
+		"wOvyHbaxdaqpZpUbI",
+		"2TT5WEy",
+		"eoCq0T2MC",
+		"ZpUbI7",
+		"oCq0T2MCp",
+		"CpLFgLg0g",
+		"FgLg0gh",
+		"w5awC5HeoCq",
+		"1c",
+	}
+	scopes := []scope{
+		{0, 19},
+		{57, 73},
+		{58, 75},
+		{47, 54},
+		{29, 38},
+		{70, 76},
+		{30, 39},
+		{37, 46},
+		{40, 47},
+		{22, 33},
+		{92, 94},
+	}
+	n := new(node)
+	for _, key := range keywords {
+		n.add(key)
+	}
+	n.build()
+	assert.ElementsMatch(t, scopes, n.find([]rune("CMICHDCSvSrgiACXVkNF9lw5awC5HeoCq0T2MCpLFgLg0gh2TT5WEyINrMwOvyHbaxdaqpZpUbI7SpIY5yVWf33MuX7K1c")))
+}
+
+func TestFuzzNodeCase3(t *testing.T) {
+	keywords := []string{
+		"QAraACKOftI4",
+		"unRmd2EO0",
+		"s25OtuoU",
+		"aGlmn7KnbE4HCX",
+		"kuK6Uh",
+		"ckuK6Uh",
+		"uK6Uh",
+		"Iy",
+		"h",
+		"PMSSUNvyi",
+		"ahz0i",
+		"Lhs4XZ1e",
+		"shPp1Va7aQNVme",
+		"yIUckuK6Uh",
+		"pKjIyI",
+		"jIyIUckuK6Uh",
+		"UckuK6Uh",
+		"Uh",
+		"JPAULjQgHJ",
+		"Wp",
+		"sbkZxXurrI",
+		"pKjIyIUckuK6Uh",
+	}
+	scopes := []scope{
+		{9, 15},
+		{8, 15},
+		{5, 15},
+		{1, 7},
+		{10, 15},
+		{3, 15},
+		{0, 2},
+		{1, 15},
+		{7, 15},
+		{13, 15},
+		{4, 6},
+		{14, 15},
+	}
+	n := new(node)
+	for _, key := range keywords {
+		n.add(key)
+	}
+	n.build()
+	assert.ElementsMatch(t, scopes, n.find([]rune("WpKjIyIUckuK6Uh")))
+}
 
 func BenchmarkNodeFind(b *testing.B) {
 	b.ReportAllocs()

+ 2 - 2
core/stringx/replacer_test.go

@@ -83,7 +83,7 @@ func TestReplacer_ReplaceEmpty(t *testing.T) {
 	assert.Equal(t, "", NewReplacer(mapping).Replace(""))
 }
 
-func TestFuzzCase1(t *testing.T) {
+func TestFuzzReplacerCase1(t *testing.T) {
 	keywords := map[string]string{
 		"yQyJykiqoh":     "xw",
 		"tgN70z":         "Q2P",
@@ -115,7 +115,7 @@ func TestFuzzCase1(t *testing.T) {
 	}
 }
 
-func TestFuzzCase2(t *testing.T) {
+func TestFuzzReplacerCase2(t *testing.T) {
 	keywords := map[string]string{
 		"dmv2SGZvq9Yz":   "TE",
 		"rCL5DRI9uFP8":   "hvsc8",