Skip to content

Amazing String Algorithms

最小覆盖子串

求最小覆盖子串。

  • KMP

    定理:最小覆盖子串长度为N-next[N]

    最小覆盖子串即前N-Next[N]个字符构成的子串。

Variants

  • 二维最小覆盖矩阵面积(挤奶网络)

    愚蠢的char**传入后无法二维下标访问,所以只能写两套函数。

    #include <iostream>
    #include <cstring>
    #include <string>
    #include <algorithm>
    
    using namespace std;
    
    const int maxr = 10005;
    const int maxc = 80;
    int R, C;
    char mat[maxr][maxc];
    char tmat[maxc][maxr];
    
    bool coleq(int i, int j) {
      for (int k = 0; k < R; k++)
          if (mat[k][i] != mat[k][j]) return false;
      return true;
    }
    
    bool roweq(int i, int j) {
      for (int k = 0; k < C; k++)
          if (tmat[k][i] != tmat[k][j]) return false;
      return true;
    }
    
    int colmncov() {
      vector<int> next(C + 1, 0);
      next[0] = -1;
      int i = 0, k = -1;
      while (i < C) {
          while (k >= 0 && !coleq(i, k)) k = next[k];
          i++, k++;
          next[i] = k;
      }
      return C - next[C];
    }
    
    int rowmncov() {
      vector<int> next(R + 1, 0);
      next[0] = -1;
      int i = 0, k = -1;
      while (i < R) {
          while (k >= 0 && !roweq(i, k)) k = next[k];
          i++, k++;
          next[i] = k;
      }
      return R - next[R];
    }
    
    int main() {
      cin >> R >> C;
      memset(mat, '\0', sizeof(mat));
      memset(tmat, '\0', sizeof(tmat));
      for (int i = 0; i < R; i++) {
          for (int j = 0; j < C; j++) {
              cin >> mat[i][j];
              tmat[j][i] = mat[i][j];
          }
      }
      cout << rowmncov() * colmncov() << endl;
    }
    

最小重复子串

判断是否存在最小重复子串。

  • KMP based

    class Solution {
    public:
        bool repeatedSubstringPattern(string s) {
            int len = s.size();
            s += "$";
            vector<int> next(len+1, 0);
            next[0] = -1;
            int i=0, k=-1;
            while(i<len){
                while(k>=0 && s[i] != s[k]) k = next[k];
                i++, k++;
                next[i] = k;
            }
            // minimum cover substring
            int cov = len - next[len];
            return cov<len && len%cov==0;
        }
    };
    
  • Trick

    Trick is trick because you will never know why trick is trick.

    class Solution {
    public:
        bool repeatedSubstringPattern(string s) {
            return (s+s).substr(1,2*s.size()-2).find(s)!=-1;
        }
    };
    

最长回文子串

  • Manacher (c++写法巨大烦人)

    #include <iostream>
    #include <string>
    #include <cstring>
    #include <algorithm>
    
    using namespace std;
    
    const int maxl = 105;
    int r[maxl * 2];
    
    string manacher(string s) {
      string ss = "^#";
      for (int i = 0; i < s.size(); i++) ss += s[i], ss += '#';
      ss += '$';
      memset(r, 0, sizeof(r));
      int p = 0, l = 0;
      for (int i = 0; i < ss.size(); i++) {
          r[i] = l > i ? min(r[2 * p - i], l - i) : 1;
          while (ss[i + r[i]] == ss[i - r[i]]) r[i]++;
          if (i + r[i] > l) {
              l = i + r[i];
              p = i;
          }
      }
      int idx = 0;
      for (int i = 1; i < ss.size(); i++) {
          if (r[i] > r[idx]) {
              idx = i;
          }
      }
      string res = "";
      for (int i = idx - r[idx] + 1; i < idx + r[idx]; i++) {
          if (ss[i] != '#') res += ss[i];
      }
      return res;
    }
    
    int main() {
      string s;
      while (cin >> s) {
          cout << manacher(s) << endl;
      }
    }
    

最长公共子串

求N个字符串的最长公共子串(每个字符串最长为L)

  • KMP + Binary Search,\(O(LlgL*N^2)\)
  • Suffix Array + Binary Search,\(O(lgL*N^2)\)

最长公共子序列

求2个字符串的最长公共子序列。

  • DP

最长上升子序列

  • DP

Distinct Subsequences

求一个字符串的Unique的子序列的个数。

  • DP