Skip to content

Commit 387d650

Browse files
authored
Merge pull request #259 from Minimini20/master
Added KMP Pattern Matching Algorithm
2 parents 091eb8e + f2f8632 commit 387d650

File tree

1 file changed

+67
-0
lines changed

1 file changed

+67
-0
lines changed
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
//Description
2+
//Knuth Morris Pratt Pattern Searching Algorithm
3+
//The idea behind KMP’s algorithm is: whenever we detect a mismatch after some matches, we already know some of the characters in the text of the next window.
4+
//We take advantage of this information to avoid matching the characters that we know will anyway match.
5+
//This algorithm is an optimization to naive string matching technique performed in O(text.length*pattern.length)
6+
//
7+
//KMP algorithm uses the concept of LPS(Longest Proper Prefix which is also a Proper Suffix)
8+
//Prefix of abcd - a, ab, abc, abcd
9+
//Proper Prefix of abcd - All prefixes except full string - a, ab, abc
10+
//First thing is to do preprocessing of text string and make a lps array that stores length of proper prefix that is also proper suffix till index i
11+
//Also, there can be overlapping between the prefixes and suffixes while calculating the longest length.
12+
//lps[0] = 0 as String made till index 0 will be a single character and that wont have any proper prefix or suffix thus longest length - 0
13+
//For example:
14+
//text - aabaacaabaad
15+
//lps[] - 010120123450
16+
17+
18+
import java.util.Scanner;
19+
20+
public class KMP_Pattern_Matching {
21+
public static int[] LPS(String s){
22+
int[]lps = new int[s.length()];
23+
int i=1,len=0;
24+
while(i<s.length()){
25+
if(s.charAt(i)==s.charAt(len)){ //characters matched then value for this lps idx will be len's previous value + 1
26+
len++;
27+
lps[i] = len;
28+
i++;
29+
}
30+
else{//characters unmatched then we need to find longest prefix and suffix length for this index
31+
if(len>0){
32+
len = lps[len-1];//check backwards
33+
}
34+
else{
35+
i++;//no matching len index found so lps[i] = 0 which is already filled from start so simply i++
36+
}
37+
}
38+
}
39+
return lps;
40+
}
41+
42+
public static void KMP(String text, String pattern){
43+
//Create a string by concatenating pattern and text and separated by a special character that can't be present in our text
44+
String s = pattern+"#"+text;
45+
46+
//Then we need to find the lps of this created string s. The maximum length of longest lps would be length of pattern only because that special character won't be present in our string
47+
int[]lps = LPS(s);
48+
49+
//Now count all the indexes in this lps array starting from i=pattern.length where value of lps[i] == pattern.length
50+
int cnt=0;
51+
for(int i=pattern.length();i<s.length();i++){
52+
if(lps[i]==pattern.length()){
53+
cnt++;
54+
}
55+
}
56+
System.out.println("Pattern is found in text "+cnt+" times");
57+
}
58+
59+
public static void main(String[]args) {
60+
Scanner scn = new Scanner(System.in);
61+
String text = scn.nextLine();
62+
String pattern = scn.nextLine();
63+
KMP(text,pattern);
64+
}
65+
}
66+
67+
//Overall Time Complexity of KMP is O(text.length+pattern.length)

0 commit comments

Comments
 (0)