kmp.ii:
// Implements the Knuth-Morris-Pratt substring matching
// algorithm: CLR 34.4
// Returns the index of pattern within text
// Returns -1 if pattern doesn't occur anywhere in text
match(text:string, pattern:string):int
// Like match above, but optimized for doing multiple matches
// per piece of text
// Pass the result of begin repeatedly to match_next along
// the previous index at which the pattern was found
// to retrive more matches
begin(text:string, pattern:string):array[int]
next(text:string, pattern:string,
precomputed:array[int], start_pos:int):int
kmp.im:
//algorithm from CLR 34.4
match(t:string, p:string):int =
matcher(t, p, prefix(p), 1)
begin(t:string, p:string):array[int] =
prefix(p)
next(t:string, p:string, pi:array[int], i:int):int =
matcher(t, p, pi, i+1)
matcher(t:string, p:string, pi:array[int], i_:int):int = (
i:int = i_;
n:int = length t;
m:int = length p;
q:int = 0;
while (i <= n) (
while (q > 0 & p[q] != t[i-1]) ( q = pi[q-1]);
if (p[q] == t[i-1])
q++;
if (q == m) (
return i-m+1;
);
i++;
);
return -1;
)
prefix(p:string):array[int] = (
m:int = length p;
pi:array[int] = new int[m](0);
k:int = 0;
q:int = 2;
while (q <= m) (
while (k > 0 & p[k] != p[q-1]) ( k = pi[k-1] );
if (p[k] == p[q-1])
k++;
pi[q-1] = k;
q++;
);
pi
)
kmp_test.im:
uses file.FileInput, file.read,
io.InputStream, io.printi, io.print,
conv.itos,
match_next = kmp.next, match_begin = kmp.begin
main(args:array[string]):int = (
s:string = "";
if (length args < 1) (
print("usage: kmp_test <file> <string>\N");
return 1
);
p:string = args[1];
in: FileInput = read(args[0]);
if (in == null) (
print("Couldn't open " + args[0] + "\N");
return 1
);
while (! in.eof()) (
s = s + in.readln();
);
precomputed:array[int] = match_begin(s, p);
i:int = 0;
j:int = 0;
while (i != -1) (
i = match_next(s,p,precomputed, i);
if (i != -1) (
print("Match found at offset " + itos(i) + ".\n");
j++;
)
);
print(itos(j) + " matches found.\N");
0
)