Skip to content

Commit b43d099

Browse files
committed
新增FindBestMatch接口
1 parent 7dc4c90 commit b43d099

File tree

4 files changed

+70
-42
lines changed

4 files changed

+70
-42
lines changed

README.md

+5
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ strsim是golang实现的字符串相识度库,后端集成多种算法,主
1818
## 内容
1919
- [比较两个字符串相识度](#比较两个字符串相识度)
2020
- [从字符串数组里面找到相似度最高的字符串](#从数组里找到相似度最高的字符串)
21+
- [从字符串数组里面找到相似度最高的字符串-带下标](#从数组里找到相似度最高的字符串-带下标)
2122
- [选择不同算法](##选择不同算法)
2223
- [莱文斯坦-编辑距离(Levenshtein)](#莱文斯坦-编辑距离(Levenshtein))
2324
- [选择Dice's coefficient](#选择Dice's-coefficient)
@@ -33,6 +34,10 @@ strsim.Compare("中国人", "中")
3334
```go
3435
strsim.FindBestMatchOne("海刘", []string{"白日依山尽", "黄河入海流", "欲穷千里目", "更上一层楼"})
3536
```
37+
## 从数组里找到相似度最高的字符串-带下标
38+
```go
39+
strsim.FindBestMatch("海刘", []string{"白日依山尽", "黄河入海流", "欲穷千里目", "更上一层楼"})
40+
```
3641

3742
## 选择不同算法
3843
### 莱文斯坦-编辑距离(Levenshtein)

strsim.go

+3-42
Original file line numberDiff line numberDiff line change
@@ -19,46 +19,7 @@ func FindBestMatchOne(s string, targets []string, opts ...Option) *similarity.Ma
1919
return r.Match
2020
}
2121

22-
// 比较两个字符串内部函数
23-
func compare(s1, s2 string, o *option) float64 {
24-
if s, e := modifyStrAndCheck(o, &s1, &s2); e {
25-
return s
26-
}
27-
28-
return o.cmp(s1, s2)
29-
}
30-
31-
// 前处理主要涉及,修改字符串,和边界判断
32-
func modifyStrAndCheck(o *option, s1, s2 *string) (score float64, exit bool) {
33-
modifyString(o, s1)
34-
modifyString(o, s2)
35-
36-
return check(*s1, *s2)
37-
}
38-
39-
// 记录每个targets子串的相似度打分,并且返回相似度最高的那个字符串, 内部函数
40-
func findBestMatch(s string, targets []string, opts ...Option) *similarity.MatchResult {
41-
42-
var opt option
43-
opt.fillOption(opts...)
44-
45-
match := make([]*similarity.Match, 0, len(targets))
46-
bestIndex := 0
47-
for k, s2 := range targets {
48-
49-
score := compare(s, s2, &opt)
50-
51-
//fmt.Printf("score:%f(%s)(%s)\n", score, s, s2)
52-
match = append(match, &similarity.Match{S: s2, Score: score})
53-
54-
if k == 0 {
55-
continue
56-
}
57-
58-
if score > match[bestIndex].Score {
59-
bestIndex = k
60-
}
61-
}
62-
63-
return &similarity.MatchResult{AllResult: match, Match: match[bestIndex], BestIndex: bestIndex}
22+
// 返回相似度最高的那个字符串, 以及索引位置
23+
func FindBestMatch(s string, targets []string, opts ...Option) *similarity.MatchResult {
24+
return findBestMatch(s, targets, opts...)
6425
}

strsim_priv.go

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
package strsim
2+
3+
import "github.com/antlabs/strsim/similarity"
4+
5+
// 比较两个字符串内部函数
6+
func compare(s1, s2 string, o *option) float64 {
7+
if s, e := modifyStrAndCheck(o, &s1, &s2); e {
8+
return s
9+
}
10+
11+
return o.cmp(s1, s2)
12+
}
13+
14+
// 前处理主要涉及,修改字符串,和边界判断
15+
func modifyStrAndCheck(o *option, s1, s2 *string) (score float64, exit bool) {
16+
modifyString(o, s1)
17+
modifyString(o, s2)
18+
19+
return check(*s1, *s2)
20+
}
21+
22+
// 记录每个targets子串的相似度打分,并且返回相似度最高的那个字符串, 内部函数
23+
func findBestMatch(s string, targets []string, opts ...Option) *similarity.MatchResult {
24+
25+
var opt option
26+
opt.fillOption(opts...)
27+
28+
match := make([]*similarity.Match, 0, len(targets))
29+
bestIndex := 0
30+
for k, s2 := range targets {
31+
32+
score := compare(s, s2, &opt)
33+
34+
//fmt.Printf("score:%f(%s)(%s)\n", score, s, s2)
35+
match = append(match, &similarity.Match{S: s2, Score: score})
36+
37+
if k == 0 {
38+
continue
39+
}
40+
41+
if score > match[bestIndex].Score {
42+
bestIndex = k
43+
}
44+
}
45+
46+
return &similarity.MatchResult{AllResult: match, Match: match[bestIndex], BestIndex: bestIndex}
47+
}

strsim_test.go

+15
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,18 @@ func Test_FindBestMatchOne(t *testing.T) {
4545
}
4646
}
4747
}
48+
49+
func Test_FindBestMatch(t *testing.T) {
50+
for _, d := range []bestTest{
51+
{best: []string{"朝辞白帝彩云间", "千里江陵一日还", "两岸猿声啼不住", "轻舟已过万重山"}, key: "千里还", need: "千里江陵一日还"},
52+
} {
53+
for _, o := range []Option{
54+
DiceCoefficient(1),
55+
Jaro(),
56+
Default(),
57+
} {
58+
m := FindBestMatch(d.key, d.best, o)
59+
assert.Equal(t, m.Match.S, d.need)
60+
}
61+
}
62+
}

0 commit comments

Comments
 (0)