浏览代码

fix: replace shoud replace the longest match

hudahai 2 年之前
父节点
当前提交
d9a732a273
共有 3 个文件被更改,包括 73 次插入61 次删除
  1. 42 0
      core/stringx/node.go
  2. 22 61
      core/stringx/replacer.go
  3. 9 0
      core/stringx/replacer_test.go

+ 42 - 0
core/stringx/node.go

@@ -98,3 +98,45 @@ func (n *node) find(chars []rune) []scope {
 
 	return scopes
 }
+
+func (n *node) longestMatch(chars []rune, start int) (used int, jump *node, matched bool) {
+	cur := n
+	var matchedNode *node
+	for i := start; i < len(chars); i++ {
+		child, ok := cur.children[chars[i]]
+		if ok {
+			cur = child
+			if cur.end {
+				matchedNode = cur
+			}
+		} else {
+			if matchedNode != nil {
+				return matchedNode.depth, nil, true
+			}
+			if n.end {
+				return start, nil, true
+			}
+			var jump *node
+			for cur.fail != nil {
+				jump, ok = cur.fail.children[chars[i]]
+				if ok {
+					break
+				}
+				cur = cur.fail
+			}
+			if jump != nil {
+				return i + 1 - jump.depth, jump, false
+			}
+			return i + 1, nil, false
+		}
+	}
+	// this longest matched node
+	if matchedNode != nil {
+		return matchedNode.depth, nil, true
+	}
+	// last mathed node
+	if n.end {
+		return start, nil, true
+	}
+	return len(chars), nil, false
+}

+ 22 - 61
core/stringx/replacer.go

@@ -1,6 +1,8 @@
 package stringx
 
-import "strings"
+import (
+	"bytes"
+)
 
 type (
 	// Replacer interface wraps the Replace method.
@@ -30,68 +32,27 @@ func NewReplacer(mapping map[string]string) Replacer {
 
 // Replace replaces text with given substitutes.
 func (r *replacer) Replace(text string) string {
-	var builder strings.Builder
-	var start int
-	chars := []rune(text)
-	size := len(chars)
-
-	for start < size {
-		cur := r.node
-
-		if start > 0 {
-			builder.WriteString(string(chars[:start]))
-		}
-
-		for i := start; i < size; i++ {
-			child, ok := cur.children[chars[i]]
-			if ok {
-				cur = child
-			} else if cur == r.node {
-				builder.WriteRune(chars[i])
-				// cur already points to root, set start only
-				start = i + 1
-				continue
+	var buf bytes.Buffer
+	target := []rune(text)
+	cur := r.node
+	nextStart := 0
+	for len(target) != 0 {
+		used, jump, matched := cur.longestMatch(target, nextStart)
+		if matched {
+			replaced := r.mapping[string(target[:used])]
+			target = append([]rune(replaced), target[used:]...)
+			cur = r.node
+		} else {
+			buf.WriteString(string(target[:used]))
+			target = target[used:]
+			if jump != nil {
+				cur = jump
+				nextStart = jump.depth
 			} else {
-				curDepth := cur.depth
-				cur = cur.fail
-				child, ok = cur.children[chars[i]]
-				if !ok {
-					// write this path
-					builder.WriteString(string(chars[i-curDepth : i+1]))
-					// go to root
-					cur = r.node
-					start = i + 1
-					continue
-				}
-
-				failDepth := cur.depth
-				// write path before jump
-				builder.WriteString(string(chars[start : start+curDepth-failDepth]))
-				start += curDepth - failDepth
-				cur = child
-			}
-
-			if cur.end {
-				val := string(chars[i+1-cur.depth : i+1])
-				builder.WriteString(r.mapping[val])
-				builder.WriteString(string(chars[i+1:]))
-				// only matching this path, all previous paths are done
-				if start >= i+1-cur.depth && i+1 >= size {
-					return builder.String()
-				}
-
-				chars = []rune(builder.String())
-				size = len(chars)
-				builder.Reset()
-				break
+				cur = r.node
+				nextStart = 0
 			}
 		}
-
-		if !cur.end {
-			builder.WriteString(string(chars[start:]))
-			return builder.String()
-		}
 	}
-
-	return string(chars)
+	return buf.String()
 }

+ 9 - 0
core/stringx/replacer_test.go

@@ -51,6 +51,15 @@ func TestReplacer_ReplaceMultiMatches(t *testing.T) {
 	assert.Equal(t, "零一23四五一23四五", NewReplacer(mapping).Replace("零一二三四五一二三四五"))
 }
 
+func TestReplacer_ReplaceLongestMatching(t *testing.T) {
+	keywords := map[string]string{
+		"日本":    "japan",
+		"日本的首都": "东京",
+	}
+	replacer := NewReplacer(keywords)
+	assert.Equal(t, "东京在japan", replacer.Replace("日本的首都在日本"))
+}
+
 func TestReplacer_ReplaceJumpToFail(t *testing.T) {
 	mapping := map[string]string{
 		"bcdf": "1235",