刚学完NLP领域的经典算法 Minimum Edit Distance, 也对动态规划有了初步的认识。这里贴出我用Java实现的最短编辑距离的代码。

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
public class MinEditDistance {
private String source;
private String target;

public static final int SHOW_PROCESS = 1;
public static final int NO_SHOW_PROCESS = 0;

public MinEditDistance(String source,String target) {
// TODO Auto-generated constructor stub
this.source = source;
this.target = target;
}

// 对外暴露的接口
public int getMinEditDistance(){
return calculate(NO_SHOW_PROCESS);
}

// 显示计算的过程,将矩阵打印出来
public int showProcess(){
return calculate(SHOW_PROCESS);
}

//计算
private int calculate(int parameter){

int sourceLen = source.length();
int targetLen = target.length();

//矩阵
int[][] distance = new int[sourceLen + 1][targetLen + 1];

// 初始化
distance[0][0] = 0;
for(int i = 1; i <= targetLen; i++){
distance[0][i] = distance[0][i-1] + insertCost(target.charAt(i-1));
}
for(int i = 1; i <= sourceLen; i++){
distance[i][0] = distance[i-1][0] + deleteCost(source.charAt(i-1));
}

for(int i = 1; i <= sourceLen; i++){
for(int j = 1; j <= targetLen; j++){
distance[i][j] = min(distance[i-1][j] + deleteCost(source.charAt(i-1)),
distance[i][j-1] + insertCost(target.charAt(j-1)),
distance[i-1][j-1] + substanceCost(source.charAt(i-1),target.charAt(j-1)));
}
}


// System.out.println(distance[sourceLen][targetLen]);

if(parameter == SHOW_PROCESS){ // 显示过程

for(int i = 0; i <= sourceLen; i++){
for(int j = 0; j <= targetLen; j++){
System.out.print(distance[i][j] + " ");
}
System.out.println();
}
}

return distance[sourceLen][targetLen];
}


private int min(int i, int j, int k) {
// TODO Auto-generated method stub

int minNum = i;
if(j < minNum)
minNum = j;
if(k < minNum)
minNum = k;

return minNum;
}

private int substanceCost(char charAt, char charAt2) {
// TODO Auto-generated method stub

if(charAt == charAt2)
return 0;
else
return 2;
}

private int deleteCost(char charAt) {
// TODO Auto-generated method stub
return 1;
}

private int insertCost(char charAt) {
// TODO Auto-generated method stub
return 1;
}
}

运行示例

运行代码

1
2
3
4
5
6
7
8
9
10
11

String source = "INTENTION";
String target = "EXECUTION";

MinEditDistance min = new MinEditDistance(source, target);

//显示计算过程
min.showProcess()

//显示计算结果
System.out.println(source + " 到 " + target + " 的最短编辑距离为: " + min.getMinEditDistance());

计算结果

1
2
3
4
5
6
7
8
9
10
11
12
0    1    2    3    4    5    6    7    8    9
1 2 3 4 5 6 7 6 7 8
2 3 4 5 6 7 8 7 8 7
3 4 5 6 7 8 7 8 9 8
4 3 4 5 6 7 8 9 10 9
5 4 5 6 7 8 9 10 11 10
6 5 6 7 8 9 8 9 10 11
7 6 7 8 9 10 9 8 9 10
8 7 8 9 10 11 10 9 8 9
9 8 9 10 11 12 11 10 9 8

INTENTION 到 EXECUTION 的最短编辑距离为: 8