package utils import ( "errors" "sync" ) var ( dfaInstance map[string]*DFAInstance mu sync.RWMutex ) type DFAInstance struct { DFA *DFA BuffSize int } // GetDFA returns the singleton instance of DFA func GetDFA(kbID string) *DFAInstance { mu.RLock() defer mu.RUnlock() return dfaInstance[kbID] } // InitDFA Initialize a new DFA. --> this func used by pro func InitDFA(kbID string, words []string) { mu.Lock() defer mu.Unlock() newDFA := &DFA{ Root: NewTrieNode(), } var BuffSize int // 默认为0 for _, word := range words { newDFA.AddWord(word) if BuffSize < len([]rune(word)) { BuffSize = len([]rune(word)) } } if dfaInstance == nil { dfaInstance = make(map[string]*DFAInstance) } dfaInstance[kbID] = &DFAInstance{ DFA: newDFA, BuffSize: BuffSize, } } // TrieNode Define the nodes of DFA type TrieNode struct { Children map[rune]*TrieNode IsEnd bool } // NewTrieNode Create a new Trie node func NewTrieNode() *TrieNode { return &TrieNode{ Children: make(map[rune]*TrieNode), IsEnd: false, } } // DFA The structure contains the root node of the DFA type DFA struct { Root *TrieNode } // AddWord Add sensitive words to DFA func (d *DFA) AddWord(word string) { node := d.Root for _, char := range word { if _, exists := node.Children[char]; !exists { node.Children[char] = NewTrieNode() } node = node.Children[char] } node.IsEnd = true } // UpdateOldWord update old word func (d *DFA) UpdateOldWord(oldWord, newWord string) { d.DeleteWord(oldWord) d.AddWord(newWord) } // DeleteWord delete word func (d *DFA) DeleteWord(word string) bool { result := []rune(word) // 辅助函数用于递归删除节点 var deleteNode func(node *TrieNode, index int) bool deleteNode = func(node *TrieNode, index int) bool { if index == len(result) { // 如果该词不存在,直接返回 if !node.IsEnd { return false } // 清除该词的结束标记 node.IsEnd = false // 如果该节点没有子节点,可以删除 return len(node.Children) == 0 } char := result[index] child, exists := node.Children[char] if !exists { return false // 如果路径不存在,则不做任何操作 } // 递归删除子节点 shouldDeleteChild := deleteNode(child, index+1) if shouldDeleteChild { // 删除当前节点的子节点 delete(node.Children, char) // 如果当前节点没有其他子节点且不是词尾节点,返回 true return len(node.Children) == 0 && !node.IsEnd } return false } // 调用递归函数删除指定的词 return deleteNode(d.Root, 0) } // DeleteWordBatch delete word batch func (d *DFA) DeleteWordBatch(words []string) { wg := sync.WaitGroup{} for _, word := range words { wg.Add(1) go func() { d.DeleteWord(word) wg.Done() }() } wg.Wait() } // Filter the input text and replace sensitive words func (d *DFA) Filter(text string) string { result := []rune(text) // 转化为rune for i := 0; i < len(result); i++ { // 外层循环,遍历每个字符作为起始点 node := d.Root j := i for j < len(result) { // 内层循环,尝试匹配敏感词 if nextNode, exists := node.Children[result[j]]; exists { // 如果当前字符在子节点中存在 node = nextNode // 下移 if node.IsEnd { // 是否为结尾,即匹配到敏感词,替换为* for k := i; k <= j; k++ { result[k] = '🚫' } } j++ // next char } else { break } } } return string(result) } // Check if the input text contains sensitive words func (d *DFA) Check(text string) error { result := []rune(text) for i := 0; i < len(result); { node := d.Root start := i matched := false for j := i; j < len(result); j++ { char := result[j] if nextNode, exists := node.Children[char]; exists { node = nextNode if node.IsEnd { return errors.New("包含敏感词: " + string(result[start:j+1])) } } else { break } } if !matched { i++ } } return nil }