--- tags: codebook --- {%hackmd theme-dark %} # KMP algorithm Knuth–Morris–Pratt algorithm ## 核心概念 找出在匹配失敗後可以直接跳轉的地方 > 次長共同前綴後綴 ## 範例 在字串$String$中找到規則$Rule$ ``` string:12345612123125789123125 rule:123125 current:^ success:^ string:12345612123125789123125 rule:123125 current: ^ success:^^ string:12345612123125789123125 rule:123125 current: ^ success:^^^ string:12345612123125789123125 rule: 123125 current: ^ success: string:12345612123125789123125 rule: 123125 current: ^ success: string:12345612123125789123125 rule: 123125 current: ^ success: string:12345612123125789123125 rule: 123125 current: ^ success: ^ string:12345612123125789123125 rule: 123125 current: ^ success: ^^ string:12345612123125789123125 rule: 123125 current: ^ success: ^ string:12345612123125789123125 rule: 123125 current: ^ success: ^^ string:12345612123125789123125 rule: 123125 current: ^ success: ^^^ string:12345612123125789123125 rule: 123125 current: ^ success: ^^^^ string:12345612123125789123125 rule: 123125 current: ^ success: ^^^^^ string:12345612123125789123125 rule: 123125 current: ^ success: ^^^^^^ string:12345612123125789123125 rule: 123125 current: ^ success: string:12345612123125789123125 rule: 123125 current: ^ success: string:12345612123125789123125 rule: 123125 current: ^ success: string:12345612123125789123125 rule: 123125 current: ^ success: ^ string:12345612123125789123125 rule: 123125 current: ^ success: ^^ string:12345612123125789123125 rule: 123125 current: ^ success: ^^^ string:12345612123125789123125 rule: 123125 current: ^ success: ^^^^ string:12345612123125789123125 rule: 123125 current: ^ success: ^^^^^ string:12345612123125789123125 rule: 123125 current: ^ success: ^^^^^^ positions : 8 17 is matched. ``` 製作出 failure function ```cpp= for (i = 1, j = string::npos; i < rule.size(); i++) { while (j != string::npos && rule[j + 1] != rule[i]) j = fail[j]; if (rule[j + 1] == rule[i]) j++; fail[i] = j; } ``` ## 範例程式碼 ```cpp= #include<iostream> #include<vector> using namespace std; struct KMP { const string pattern; vector<size_t> fail; vector<size_t> operator()(const string& str) { vector<size_t> v; for (size_t i = 0, j = string::npos; i < str.size(); i++) { while (j != string::npos && str[i] != pattern[j + 1]) j = fail[j]; j += str[i] == pattern[j + 1]; if (j == pattern.size() - 1) v.push_back(i - pattern.size() + 1); } return v; } KMP(const string& str): pattern(str), fail(str.size() + 1, string::npos) { for (size_t i = 2, j = string::npos; i < pattern.size(); i++) { while (j != string::npos && pattern[i] != pattern[j + 1]) j = fail[j]; j += pattern[i] == pattern[j + 1]; fail[i] = j; } } }; ostream& operator<<(ostream& os, const vector<auto>& v) { for (const auto& i : v) os << i << ' '; return os; } int main() { KMP t("aba"); cout << t("aaaababaa") << endl; return 0; } ``` ## process ```cpp= #include<iostream> #include<iomanip> #include<vector> #include<string> #include<algorithm> using namespace std; template<typename T> ostream& operator<<(ostream& os, const vector<T>& v) { for (const auto& i : v) os << (i == string::npos ? -1 : int(i)) << ' '; return os; } struct KMP { vector<size_t> fail; const string rule; vector<size_t> operator()(const string& str) { vector<size_t> appeared; for (size_t i = 0, j = string::npos; i != str.size(); i++) { while (j != string::npos && rule[j + 1] != str[i]) j = fail[j]; j += (rule[j + 1] == str[i]); if (j == rule.size() - 1) appeared.push_back(i - rule.size() + 1); cout << fail << endl; cout << " string:" << str << endl; cout << " rule:" << string(i - j, ' ') << rule << endl; string tmp; cout << " current:" << (tmp.assign(str.size() + 1, ' '), tmp[i] = '^', tmp) << ' ' << i << endl; if (tmp.assign(str.size() + 1, ' '), j != string::npos) fill_n(tmp.begin() + i - j, j + 1, '^'); cout << " success:" << tmp << ' ' << j << endl << endl; } if (appeared.empty()) appeared.push_back(string::npos); return appeared; } KMP(const string& str): fail(str.size(), string::npos), rule(str) { for (size_t i = 1, j = string::npos; i != rule.size(); i++) { while (j != string::npos && rule[j + 1] != rule[i]) j = fail[j]; j += rule[j + 1] == rule[i]; fail[i] = j; } } }; int main() { string str; KMP s("123125"); cout << s("123456789123125") << "is matched." << endl; return 0; } ```