Реализации алгоритмов/Алгоритм Кнута — Морриса — Пратта
Внешний вид
Пусть ищется строка в строке . Построим строку , где символ — символ, не встречающийся ни в , ни в . Далее вычислим значения префикс-функции от строки и всех её префиксов. Теперь, если префикс-функция от префикса строки длины равна , где — длина , и , то в строке есть вхождение , начиная с позиции .
Function Pos ( const Needle, HayStack : string ) : Integer;
var
F: array of Integer;
k, i: integer;
begin
SetLength(F, 1+Length(Needle)); // Строки индексируются с 1, динамические массивы - с 0.
F[1] := 0;
k := 0;
for i := 2 to Length(Needle) do
begin
while (k > 0) and (Needle[k+1] <> Needle[i]) do
k := F[k];
if Needle[k+1] = Needle[i] then
Inc(k);
F[i] := k;
end;
k := 0;
for i := 1 to Length(HayStack) do
begin
while (k > 0) and (Needle[k+1] <> HayStack[i]) do
k := F[k];
if Needle[k+1] = HayStack[i] Then
Inc(k);
if k = Length(Needle) Then
begin
Result := i-length(Needle)+1;
//Здесь обрабатываются полученные данные после того как мы нашли подстроку,
//можно сделать результатом работы функции не только положение подстроки хотя это и есть основная задача этой функции
Break;
end;
end;
end;
#include <iostream>
#include <string>
#include <vector>
using namespace std;
string::size_type KMP(const string& S, int begin, const string& pattern)
{
vector<int> pf(pattern.length());
pf[0] = 0;
for (int k = 0, i = 1; i < pattern.length(); ++i)
{
while ((k > 0) && (pattern[i] != pattern[k]))
k = pf[k - 1];
if (pattern[i] == pattern[k])
k++;
pf[i] = k;
}
for (int k = 0, i = begin; i < S.length(); ++i)
{
while ((k > 0) && (pattern[k] != S[i]))
k = pf[k - 1];
if (pattern[k] == S[i])std::cout << k <<"\t" <<i+1 << "\t"; //для просмотра входов
k++;
if (k == pattern.length())
return (i - pattern.length() + 1); //либо продолжаем поиск следующих вхождений
}
return (string::npos);
std::cout << string::npos;
}
int main()
{
setlocale(LC_ALL, "ru");
string pattern = "Rаw";
string ss = "лилRjjruoлилRawилил";
int begin = 3;
KMP( ss, begin, pattern);
return 0;
}
int seek_substring_KMP (char s[], char p[])
{
int i, j, N, M;
N = strlen(s);
M = strlen(p);
// Динамический массив длины М
int *d = (int*)malloc(M * sizeof(int));
// Вычисление префикс-функции
d[0] = 0;
for(i = 1, j = 0; i < M; i++)
{
while(j > 0 && p[j] != p[i])
j = d[j-1];
if(p[j] == p[i])
j++;
d[i] = j;
}
// Поиск
for(i = 0, j = 0; i < N; i++)
{
while(j > 0 && p[j] != s[i])
j = d[j - 1];
if(p[j] == s[i])
j++;
if(j == M)
{
free(d);
return i - j + 1;
}
}
free(d);
return -1;
}
static int[] GetPrefix(string s)
{
int[] result = new int[s.Length];
result[0] = 0;
int index = 0;
for (int i = 1; i < s.Length; i++)
{
int k = result[i - 1];
while (s[k] != s[i] && k > 0)
{
k = result[k - 1];
}
if (s[k] == s[i])
{
result[i] = k + 1;
}
else
{
result[i] = 0;
}
}
return result;
}
static int FindSubstring(string pattern, string text)
{
int[] pf = GetPrefix(pattern);
int index = 0;
for (int i = 0; i < text.Length; i++)
{
while (index > 0 && pattern[index] != text[i]) { index = pf[index - 1]; }
if (pattern[index] == text[i]) index++;
if (index == pattern.Length)
{
return i - index + 1;
}
}
return -1;
}
public static int[] indexesOf(char[] pattern, char[] text)
{
int[] pfl = pfl(pattern);
int[] indexes = new int[text.length];
int size = 0;
int k = 0;
for (int i = 0; i < text.length; ++i)
{
while (pattern[k] != text[i] && k > 0)
{
k = pfl[k - 1];
}
if (pattern[k] == text[i])
{
k = k + 1;
if (k == pattern.length)
{
indexes[size] = i + 1 - k;
size += 1;
k = pfl[k - 1];
}
}
else
{
k = 0;
}
}
return Arrays.copyOfRange(indexes, 0, size);
}
public static int[] pfl(char[] text)
{
int[] pfl = new int[text.length];
pfl[0] = 0;
for (int i = 1; i < text.length; ++i)
{
int k = pfl[i - 1];
while (text[k] != text[i] && k > 0)
{
k = pfl[k - 1];
}
if (text[k] == text[i])
{
pfl[i] = k + 1;
}
else
{
pfl[i] = 0;
}
}
return pfl;
}
import qualified Data.Vector as V
import qualified Data.Vector.Mutable as VM
import qualified Data.ByteString.Char8 as BS
type PrefixTable = V.Vector Int
makePrefixTable :: BS.ByteString -> PrefixTable
makePrefixTable s =
let n = BS.length s
in V.create $ do
p <- VM.new n
VM.set p 0
let loop i k | i == n = return ()
| k > 0 && (BS.index s i /= BS.index s k) = do
nk <- VM.read p (k - 1)
loop i nk
| otherwise = do
let nk = if BS.index s i == BS.index s k
then k + 1
else k
VM.write p i nk
loop (i + 1) nk
loop 1 0
return p
indexOf :: BS.ByteString -> BS.ByteString -> Int
indexOf needle haystack =
let pt = makePrefixTable needle
hLen = BS.length haystack
nLen = BS.length needle
loop i k | k == nLen = i - nLen
| i == hLen = -1
| k > 0 && (BS.index needle k /= BS.index haystack i) =
loop i $ pt V.! (k - 1)
| otherwise =
let nk = if BS.index needle k == BS.index haystack i
then k + 1
else k
in loop (i + 1) nk
in loop 0 0
Go
[править]func computeLPS(pat string) (lps []int) {
M := len(pat)
lps = make([]int, M) // lps[0] = 0
l := 0 // length of the previous longest prefix suffix
// the loop calculates lps[i] for i = 1 to M-1
for i := 1; i < M; i++ {
for {
if pat[i] == pat[l] {
l++
break
}
if l == 0 {
break
}
l = lps[l-1]
}
lps[i] = l
}
return lps
}
func KMPSearch(pat, txt string) {
M, N := len(pat), len(txt)
// Preprocess the pattern that will hold the longest prefix suffix values for pattern
lps := computeLPS(pat)
for i, j := 0, 0; i < N; i++ {
for {
if pat[j] == txt[i] {
j++
if j == M {
fmt.Printf("Found pattern at index %d \n", i-j+1)
j = lps[j-1]
}
break
}
if j > 0 {
j = lps[j-1]
} else {
break
}
}
}
}
Lua
[править]Для Lua версии 5.1 и выше.
-- Implementation of the Knuth Morris Pratt algorithm to find
-- substrings within strings.
-- Sean Brewer
-- Berry College CS 320
-- March 25, 2008
-- Generate the failure table using the substring and the length
-- of the substring
function generate_fail_table(substring,sublen)
comparisons = 0
fail = {0}
for i=2,sublen do
temp = fail[i - 1]
while temp > 0 and string.sub(substring,temp,temp) ~= string.sub(substring,i - 1,i - 1) do
comparisons = comparisons + 1
temp = fail[temp]
end
fail[i] = temp + 1
end
return {fail, comparisons + 1}
end
--The actual kmp algorithm which takes
--the substring and text as arguments
function kmp_algorithm(substring,text)
--starting positions...
subloc = 1
textloc = 1
--get the length of substring and text..
sublen = string.len(substring)
textlen = string.len(text)
--generate the fail links...
failure = generate_fail_table(substring,sublen)
--store failure comparisons
comparisons = failure[2]
--store fail table
fail = failure[1]
--the main loop..
while textloc <= textlen and subloc <= sublen do
if subloc == 0 or string.sub(text,textloc,textloc) == string.sub(substring,subloc,subloc) then
comparisons = comparisons + 1
textloc = textloc + 1
subloc = subloc + 1
else --no match found so follow fail link
subloc = fail[subloc]
end
end
if subloc > sublen then
return {textloc - sublen, comparisons}
else
return {0, comparisons}
end
end
Python
[править]# Knuth-Morris-Pratt string matching
# David Eppstein, UC Irvine, 1 Mar 2002
#from http://code.activestate.com/recipes/117214/
def KnuthMorrisPratt(text, pattern):
'''Yields all starting positions of copies of the pattern in the text.
Calling conventions are similar to string.find, but its arguments can be
lists or iterators, not just strings, it returns all matches, not just
the first one, and it does not need the whole text in memory at once.
Whenever it yields, it will have read the text exactly up to and including
the match that caused the yield.'''
# allow indexing into pattern and protect against change during yield
pattern = list(pattern)
# build table of shift amounts
shifts = [1] * (len(pattern) + 1)
shift = 1
for pos in range(len(pattern)):
while shift <= pos and pattern[pos] != pattern[pos-shift]:
shift += shifts[pos-shift]
shifts[pos+1] = shift
# do the actual search
startPos = 0
matchLen = 0
for c in text:
while matchLen == len(pattern) or \
matchLen >= 0 and pattern[matchLen] != c:
startPos += shifts[matchLen]
matchLen -= shifts[matchLen]
matchLen += 1
if matchLen == len(pattern):
yield startPos
PHP
[править]<?php declare(strict_types = 1);
function kmp(string $str = '', string $substr = ''): int {
$n = mb_strlen($str);
$m = mb_strlen($substr);
// prefix
$d[0] = 0;
for ($i = 1, $j = 0; $i < $m; $i++) {
while($j > 0 && $substr[$j] != $substr[$i]) {
$j = $d[$j - 1];
}
if($substr[$j] == $substr[$i]) {
$j++;
}
$d[$i] = $j;
}
// search
for($i = 0, $j = 0; $i < $n; $i++) {
while($j > 0 && $substr[$j] != $str[$i]) {
$j = $d[$j - 1];
}
if($substr[$j] == $str[$i]) {
$j++;
}
if($j == $m) {
return $i - $j + 1;
}
}
return -1;
}