// Implements the Knuth-Morris-Pratt substring matching // algorithm: CLR 34.4 // Returns the index of pattern within text // Returns -1 if pattern doesn't occur anywhere in text match(text:string, pattern:string):int // Like match above, but optimized for doing multiple matches // per piece of text // Pass the result of begin repeatedly to match_next along // the previous index at which the pattern was found // to retrive more matches begin(text:string, pattern:string):array[int] next(text:string, pattern:string, precomputed:array[int], start_pos:int):int
//algorithm from CLR 34.4 match(t:string, p:string):int = matcher(t, p, prefix(p), 1) begin(t:string, p:string):array[int] = prefix(p) next(t:string, p:string, pi:array[int], i:int):int = matcher(t, p, pi, i+1) matcher(t:string, p:string, pi:array[int], i_:int):int = ( i:int = i_; n:int = length t; m:int = length p; q:int = 0; while (i <= n) ( while (q > 0 & p[q] != t[i-1]) ( q = pi[q-1]); if (p[q] == t[i-1]) q++; if (q == m) ( return i-m+1; ); i++; ); return -1; ) prefix(p:string):array[int] = ( m:int = length p; pi:array[int] = new int[m](0); k:int = 0; q:int = 2; while (q <= m) ( while (k > 0 & p[k] != p[q-1]) ( k = pi[k-1] ); if (p[k] == p[q-1]) k++; pi[q-1] = k; q++; ); pi )
uses file.FileInput, file.read, io.InputStream, io.printi, io.print, conv.itos, match_next = kmp.next, match_begin = kmp.begin main(args:array[string]):int = ( s:string = ""; if (length args < 1) ( print("usage: kmp_test <file> <string>\N"); return 1 ); p:string = args[1]; in: FileInput = read(args[0]); if (in == null) ( print("Couldn't open " + args[0] + "\N"); return 1 ); while (! in.eof()) ( s = s + in.readln(); ); precomputed:array[int] = match_begin(s, p); i:int = 0; j:int = 0; while (i != -1) ( i = match_next(s,p,precomputed, i); if (i != -1) ( print("Match found at offset " + itos(i) + ".\n"); j++; ) ); print(itos(j) + " matches found.\N"); 0 )