Dynamic Programming - String Similarity

Posted on by Kenny Cason
tags = [ dynamic programming, java, string similarity ]

Java example using Dynamic Programming for measuring String Similarity: For more information regarding Dynamic Programming check here.

DynamicStringSimularity.java

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
package lib.algo.matching;

public class DynamicStringSimularity {

    private int scoreMatch = 1;

    private int scoreMismatch = -1;

    private int scoreGap = -1;

    public boolean print = false;

    public DynamicStringSimularity() {}

    public DynamicStringSimularity(int scoreMatch, int scoreMismatch, int scoreGap) {
        this.scoreMatch = scoreMatch;
        this.scoreMismatch = scoreMismatch;
        this.scoreGap = scoreGap;
    }

    /**
     * Dynamically compare two strings
     * @param seq1
     * @param seq2
     * @return 0.0 to 1.0,
     *         0.0 = 100% dissimilar
     *         1.0 = 100% similar
     */
    public double compare(String seq1, String seq2) {
        seq1 = seq1.toLowerCase();
        seq2 = seq2.toLowerCase();
        double score = Double.MIN_VALUE;

        // initialize
        int[][] alignmentMatrix = new int[seq2.length() + 1][seq1.length() + 1];
        // fill in initial values
        for(int i = 0; i <= seq1.length(); i++) {
            alignmentMatrix[0][i] = 0;
        }
        for(int i = 0; i <= seq2.length(); i++) {
            alignmentMatrix[i][0] = 0;
        }
        // calculate values
        for(int j = 1; j <= seq2.length(); j++) {
            for(int i = 1; i <= seq1.length(); i++) {
                // base score
                int matchScore = alignmentMatrix[j - 1][i - 1];
                int gapScore1 = alignmentMatrix[j - 1][i];
                int gapScore2 = alignmentMatrix[j][i - 1];
                // is there a match? diagonals
                if(seq1.charAt(i - 1) == seq2.charAt(j - 1)) {
                    matchScore += scoreMatch;
                } else {
                    matchScore += scoreMismatch;
                }
                // get gap score
                gapScore1 += scoreGap;
                gapScore2 += scoreGap;
                alignmentMatrix[j][i] = max(matchScore, gapScore1, gapScore2, 0);
                if(print) {
                    System.out.print(alignmentMatrix[j][i] + "\t");
                }
            }
            if(print) {
                System.out.println();
            }
        }        
        score = getMaxValue(alignmentMatrix);
        // normalize score
        score = score / max(seq1.length(), seq2.length());
        return score;
    }

    private int getMaxValue(int[][] alignmentMatrix) {
        int maxValue = Integer.MIN_VALUE;
        for(int j = 0; j < alignmentMatrix.length; j++) {
            for(int i = 0; i < alignmentMatrix[0].length; i++) {
                if(alignmentMatrix[j][i] > maxValue) {
                    maxValue = alignmentMatrix[j][i];
                }
            }
        }
        return maxValue;
    }

    private int max(int a, int b) {
        return (a > b) ? a : b;
    }

    private int max(int a, int b, int c, int d) {
        return max(max(max(a, b), c), d);
    }

}

DynamicStringSimularityTest.java

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
package lib.algo.matching;

import org.junit.Test;

public class DynamicStringSimularityTest {

    @Test
    public void test() {
        runTest("kenny", "kenny");
        runTest("kenny", "abcde");
        runTest("aaaaa", "bbbbb");
        runTest("aaabbb", "bbbaaa");
        runTest("aaabbb", "ababab");
        runTest("Hi my name is kenny, and this is a test", "Hi my name is benny, and this is a test");
    }

    private void runTest(String s1, String s2) {
        System.out.println("s1: " + s1 + ", s2: " + s2);
        DynamicStringSimularity dss = new DynamicStringSimularity();
        dss.print = true;
        System.out.println("score: " +  dss.compare(s1, s2));
    }

}
comments powered by Disqus