当前位置：首页 > news >正文

串匹配问题的三种算法

news 2025/8/24 16:41:22

BF算法

#include <iostream>  
#include <string>  using namespace std;  // 暴力匹配算法  
int bruteForceMatch(const string& s, const string& p) {  int n = s.length();  int m = p.length();  for (int i = 0; i <= n - m; ++i) {  int j;  for (j = 0; j < m; ++j) {  if (s[i + j] != p[j])  break;  }  if (j == m)  return i; // 匹配成功  }  return -1; // 匹配失败  
}  int main() {  string s = "ABABDABACDABABCABAB";  string p = "ABABCABAB";  cout << "Brute-Force Match Result: " << bruteForceMatch(s, p) << endl;  return 0;  
}

KMP算法

#include <iostream>  
#include <string>  
#include <vector>  using namespace std;  // 计算部分匹配表  
vector<int> computeLPSArray(const string& p, int M) {  vector<int> lps(M, 0);  int len = 0;  int i = 1;  while (i < M) {  if (p[i] == p[len]) {  len++;  lps[i] = len;  i++;  } else { // (p[i] != p[len])  if (len != 0) {  len = lps[len - 1];  } else { // if (len == 0)  lps[i] = len;  i++;  }  }  }  return lps;  
}  // KMP算法  
int KMPSearch(const string& s, const string& p) {  int n = s.length();  int m = p.length();  vector<int> lps = computeLPSArray(p, m);  int i = 0; // s的索引  int j = 0; // p的索引  while (i < n) {  if (p[j] == s[i]) {  j++;  i++;  }  if (j == m) {  return i - j; // 匹配成功，返回模式串在主串中的起始位置  j = lps[j - 1];  }  // 不匹配时  else if (i < n && p[j] != s[i]) {  // 不为0则回到之前某个位置  if (j != 0)  j = lps[j - 1];  else // j == 0  i = i + 1;  }  }  return -1; // 匹配失败  
}  int main() {  string s = "ABABDABACDABABCABAB";  string p = "ABABCABAB";  cout << "KMP Match Result: " << KMPSearch(s, p) << endl;  return 0;  
}

BM算法

#include <iostream>
#include <vector>
#include <string>using namespace std;class BoyerMoore {
public:BoyerMoore(const string& pattern) {this->pattern = pattern; // 存储模式串this->m = pattern.length(); // 模式串长度buildBadChar(); // 构建坏字符表buildGoodSuffix(); // 构建好后缀表}// 在文本中搜索模式串void search(const string& text) {int n = text.length(); // 文本长度int s = 0; // 当前文本的起始位置while (s <= n - m) { // 直到文本末尾int j = m - 1; // 从模式串末尾开始比较// 比较模式串与文本while (j >= 0 && pattern[j] == text[s + j]) {j--;}// 如果找到匹配if (j < 0) {cout << "Pattern found at index " << s << endl;// 移动模式串s += (s + m < n) ? m - badChar[text[s + m]] : 1;} else {// 移动模式串s += max(1, j - badChar[text[s + j]]);}}}private:string pattern; // 模式串int m; // 模式串长度vector<int> badChar; // 坏字符表vector<int> goodSuffix; // 好后缀表// 构建坏字符表void buildBadChar() {badChar.assign(256, -1); // 初始化为-1for (int i = 0; i < m; i++) {badChar[(int)pattern[i]] = i; // 更新坏字符的位置}}// 构建好后缀表void buildGoodSuffix() {goodSuffix.assign(m + 1, m); // 初始化vector<int> z(m); // z数组int last = m - 1;for (int i = m - 1; i >= 0; i--) {if (i == last) {while (last >= 0 && pattern[last] == pattern[last - (m - 1 - i)]) {last--;}goodSuffix[m - 1 - i] = last + 1; // 更新好后缀} else {goodSuffix[m - 1 - i] = last - i; // 更新}}}
};int main() {string text = "ABAAABCDABCDE"; // 文本string pattern = "ABC"; // 模式串BoyerMoore bm(pattern); // 创建BM对象bm.search(text); // 搜索模式串return 0;
}

暴力匹配算法（BF算法）：最坏情况时间复杂度：O(m * n)，其中m是模式串长度，n是文本长度。最坏情况下，需要逐个字符比较。
KMP算法：最坏情况时间复杂度：O(m + n)。KMP通过预处理模式串生成部分匹配表，避免了不必要的回溯，从而提高了效率。
Boyer-Moore算法（BM算法）：最坏情况时间复杂度：O(m * n)，但在大多数实际情况中，平均时间复杂度是O(n/m)。BM算法通过坏字符和好后缀启发式规则有效减少比较次数，通常在长模式串和长文本中表现优越。

总结：

BF：简单但效率低，尤其在长文本和模式串时。

KMP：高效，适合任何情况。

BM：在实际应用中通常更快，尤其是长模式串时。

查看全文

http://www.mrgr.cn/news/37858.html