acm模板-字符串

发布于 2021-10-18  778 次阅读


字符串

Hash->12种hash算法,有两到三种比较方便处理前缀hash和(BKDR和DJB已经写出)

typedef unsigned long long ull;
const ull p = 131;
const int N = 2e6 + 10;
ull BKDR_hsh[N];
ull DJB_hsh[N];
ull BKDR_hash(string s) {
  ull tmp = 0;
  for (int i = 0; i < s.length(); i++) {
    BKDR_hsh[i] = tmp * p + s[i] - 'a' + 1;
    tmp = BKDR_hsh[i];
  }
  return BKDR_hsh[s.length() - 1];
}  //字符串hash + 前缀和hash处理
ull AP_hash(string s) {
  ull hsh = 0;
  for (int i = 0; i < s.length(); i++) {
    if (i & 1)
      hsh ^= ((hsh << 11) ^ (s[i] - 'a' + 1) ^ (hsh >> 5));
    else
      hsh ^= ((hsh << 7) ^ (s[i] - 'a' + 1) ^ (hsh >> 3));
  }
  return hsh;
}
ull DJB_hash(string s) {
  ull hsh = 5381;
  for (int i = 0; i < s.length(); i++) {
    hsh = hsh * 32 + s[i] - 'a' + 1;
    DJB_hsh[i] = hsh;
  }
  return DJB_hsh[s.length() - 1];
}
ull JS_hash(const string& s) {
  ull hsh = 1315423911;
  for (char i : s) hsh ^= ((hsh << 5) + (i - 'a' + 1) + (hsh >> 2));
  return hsh;
}
ull RS_hash(const string& s) {
  ull b = 378551;
  ull a = 63689;
  ull hsh = 0;
  for (char i : s) {
    hsh = hsh * a + i - 'a' + 1;
    a *= b;
  }
  return hsh;
}
ull SDB_hash(const string& s) {
  ull hsh = 0;
  for (char i : s)
    // equivalent to: hash = 65599*hash + (*str++);
    hsh = (i - 'a' + 1) + (hsh << 6) + (hsh << 16) - hsh;
  return hsh;
}
ull PJWHash(string s) {
  ull BitsInUnignedInt = sizeof(ull) * 8;
  ull ThreeQuarters = (ull)((BitsInUnignedInt * 3) / 4);
  ull OneEighth = (ull)(BitsInUnignedInt / 8);
  ull HighBits = (ull)(0xFFFFFFFF) << (BitsInUnignedInt - OneEighth);
  ull hsh = 0;
  ull test;
  for (char i : s) {
    hsh = (hsh << OneEighth) + i - 'a' + 1;
    if ((test = hsh & HighBits) != 0)
      hsh = ((hsh ^ (test >> ThreeQuarters)) & (~HighBits));
  }
  return hsh;
}
ull ELFHash(string s) {
  ull hsh = 0;
  ull x;
  for (char i : s) {
    hsh = (hsh << 4) + i - 'a' + 1;
    if ((x = hsh & 0xF0000000L) != 0) {
      hsh ^= (x >> 24);
      hsh &= ~x;
    }
  }
  return hsh;
}
ull DEKhash(string s) {
  ull hsh = s.length();
  for (char i : s) hsh = (hsh << 5) ^ (hsh >> 27) ^ (ull)(i - 'a' + 1);
  return hsh;
}
ull BPhash(string s) {
  ull hsh = s.length();
  for (char i : s) {
    hsh = (hsh << 7) ^ (ull)(i - 'a' + 1);
  }
  return hsh;
}
ull FNVhash(string s) {
  ull fnvprime = 0x811C9DC5;
  ull hsh = 0;
  for (char i : s) {
    hsh *= fnvprime;
    hsh ^= (ull)(i - 'a' + 1);
  }
  return hsh;
}
ull JAVAhash(string s) {
  ull hsh = 0;
  for (char i : s) hsh = hsh * 31 + (ull)(i - 'a' + 1);
  return hsh;
}

KMP

nxt[i]表示字符串s的前缀中,0~nxt[i]-1和i-nxt[i]+1~i两个子串是相同的

且nxt[i]最大and nxt[i]<=i

注释部分是在kmp用在纯匹配环节时用于加速的

非注释部分是用来求上文所说的nxt[i]的

void getnxt(string s, int nxt[]) {
  int i = 0, j = -1;
  nxt[0] = -1;
  while (i < (int)s.length()) {
    if (j == -1 || s[i] == s[j]) {
      nxt[++i] = ++j;
      // if (s[i] != s[j])
      //   nxt[++i] = ++j;
      // else
      //   nxt[++i] = nxt[++j];
    } else
      j = nxt[j];
  }
}

void match(string a, string b, int nxt[]) {
  int i = 0, j = 0;
  getnxt(b, nxt);
  while (i < (int)a.length()) {
    if (j == -1 || a[i] == b[j])
      ++i, ++j;
    else
      j = nxt[j];
    if (j == b.length()) cout << i - j + 1 << endl;
  }
}

扩展 KMP(Z 函数)

Z[i]表示0~z[i]-1和i+z[i]-1两个子串完全一样且z[i]最大

void Z_box(string s, int z[]) {
  z[0] = (int)s.length();
  while (s[z[1]] == s[z[1] + 1]) z[1]++;
  int nowright = 1 + z[1] - 1, nowleft = 1;
  for (int i = 2; i < s.length(); i++) {
    if (i <= nowright) {
      int p = i - nowleft;
      z[i] = min(z[p], nowright - i + 1);
    } else
      z[i] = 0;
    while (s[i + z[i]] == s[z[i]]) z[i]++;
    if (i + z[i] - 1 > nowright) {
      nowright = i + z[i] - 1;
      nowleft = i;
    }
  }
}

Manacher

m[i]表示以i为中心的最长回文串长度

显然只能表示奇数长度的回文串

void manacher(string s, int m[]) {
  int l = (int)s.length();
  m[0] = 1;
  int nowmid = 0, nowright = 0;
  for (int i = 1; i < l; i++) {
    if (i <= nowright) {
      int p = 2 * nowmid - i;
      m[i] = min(m[p], (nowright - i + 1) * 2 - 1);
    } else
      m[i] = 1;
    int x = m[i] / 2 + 1;
    while (i - x >= 0 && i + x < l && s[i - x] == s[i + x]) {
      x++;
      m[i] += 2;
    }
    if (i + x - 1 > nowright) {
      nowright = i + x - 1;
      nowmid = i;
    }
  }
}

马拉车算法辅助:既可求奇数长度,也可求偶数长度

这里ans 表示as的最长回文串长度

cin >> as;
for (int i = 0; i < as.length(); i++) {
  bs += '#';
  bs += as[i];
}
bs += '#';
manacher(bs, m);
int ans = 0;
for (int i = 0; i < bs.length(); i++) ans = max(ans, (m[i] - 1) / 2);
cout << ans << endl;

回文自动机(PAM)

len[i]表示第i个状态所表示的回文串长度

num[i]表示以节点i表示的最长回文串的最右端点为回文串结尾的回文串个数

cnt[i]第i个节点表示的回文串出现的次数

struct PAM {
  int nxt[N][26]{};
  int fail[N]{};
  int len[N]{};
  int cnt[N]{};
  int sum[N]{};
  int last{}, tot{};
  string s;  //插入后的字符串

  int newnode(int x) {
    len[++tot] = x;
    return tot;
  }

  void init() {
    memset(nxt, 0, sizeof(nxt));
    memset(fail, 0, sizeof(fail));
    memset(cnt, 0, sizeof(cnt));
    memset(len, 0, sizeof(len));
    s = "#";
    s[0] = -1;
    tot = -1;
    newnode(0), newnode(-1);
    fail[0] = 1;
    last = 0;
  }  //初始化

  int get_fail(int x, int k) {
    for (; s[k - len[x] - 1] != s[k]; x = fail[x])
      ;
    return x;
  }

  void add(int x, int k) {
    int cur = get_fail(last, k);
    if (!nxt[cur][x]) {
      int now = newnode(len[cur] + 2);
      fail[now] = nxt[get_fail(fail[cur], k)][x];
      sum[now] = sum[fail[now]] + 1;
      nxt[cur][x] = now;
    }
    cnt[last = nxt[cur][x]]++;
  }  // PAM上添加一个新点

  int get_longest_Palindrome() {
    int ans = 0;
    for (int i = tot; i >= 0; i--) ans = max(ans, len[i]);
    return ans;
  }  //最长回文串长度

  void build(const string& as) {
    init();
    s += as;
    for (int i = 1; i < s.length(); i++) add(s[i] - 'a', i);
    for (int i = tot; i >= 0; i--) cnt[fail[i]] += cnt[i];
  }  //回文自动机建立
} p;

AC自动机

struct trie {
  int cnt = 0;
  int root{};
  int next[N][26]{};
  int fail
      [N]{};  // fail[i]表示在结点i失配时跳转到具有最长公共前后缀的字符继续匹配
  int find[N]{};  // find[i]表示结点i向上跳fail依然有end标记
  int end[N]{};
  int que[N]{}, tal = 0, hed = 1;

  int new_node() {
    cnt++;
    for (int i = 0; i < 26; i++) next[cnt][i] = -1;
    fail[cnt] = 0;
    find[cnt] = 0;
    end[cnt] = 0;
    return cnt;
  }

  void new_root() {
    cnt = 0;
    root = new_node();
  }

  void add(string s, int tt) {
    int l = (int)s.length() - 1;
    int now = root;
    for (int i = 0; i <= l; i++) {
      if (next[now][s[i] - 'a'] == -1) next[now][s[i] - 'a'] = new_node();
      now = next[now][s[i] - 'a'];
    }
    end[now] = tt;
    ans[tt] = 0;
  }

  void build() {
    fail[root] = root;
    for (int i = 0; i < 26; i++)
      if (next[root][i] == -1)
        next[root][i] = root;
      else {
        fail[next[root][i]] = root;
        que[++tal] = next[root][i];
      }
    while (hed <= tal) {
      int tmp = que[hed++];
      for (int i = 0; i < 26; i++)
        if (next[tmp][i] == -1)
          next[tmp][i] = next[fail[tmp]][i];
        else {
          fail[next[tmp][i]] = next[fail[tmp]][i];
          que[++tal] = next[tmp][i];
          if (end[fail[next[tmp][i]]] || find[fail[next[tmp][i]]])
            find[next[tmp][i]] = 1;
        }
    }
  }

  void search(string b) {
    int l = b.length() - 1;
    int tmp = root;
    for (int i = 0; i <= l; i++) {
      tmp = next[tmp][b[i] - 'a'];
      int tep = tmp;
      while (tep != root && (find[tep] != 0 || end[tep] != 0)) {
        if (end[tep]) {
          ans[end[tep]]++;
          maxn = max(maxn, ans[end[tep]]);
        }
        tep = fail[tep];
      }
    }
  }
} t;

PAM后缀自动机

支持以下功能:

(1)寻找目标串是否在模式串中出现

(2)寻找目标串在模式串中第一次出现的位置

(3)统计模式串中完全不同的子串个数(也可以魔改后缀排序)

(4)统计模式串中完全不同的所有子串长度之和

(5)求出模式串所有子串中第k小的子串

(6)求出模式串所有子串中给定长度的最小子串

(7)求出某一子串在模式串中出现多少次

(8)不在模式串中出现的最短字符串

(9)两个字符串的最长公共子串

class sam {
 private:
  int len[maxn * 2 - 1];
  int link[maxn * 2 - 1];
  int tot, last;
  int nxt[maxn * 2 - 1][26];
  int d[maxn * 2 - 1];
  int dp[maxn * 2 - 1];
  int dp_ne[maxn * 2 - 1];
  int C[maxn * 2 - 1];
  int A[maxn * 2 - 1];
  int endpos[maxn * 2 - 1];
  int firstpos[maxn * 2 - 1];
  //备注:
  // endpos(v)表示字符串v在母串中所有结束位置下标的集合
  //这里的endpos[x]表示x结点上的状态的endpos集合的大小
  // firstpos[x]表示endpos集合中最靠前的
  // C,A是辅助数组
 public:
  void init() {
    tot = 0;
    last = 0;
    len[0] = 0;
    for (int i = 0; i < 26; i++) nxt[0][i] = -1;
    link[0] = -1;
  }

  void insert(int c) {
    int p = last, np = ++tot;
    last = np;
    len[np] = len[p] + 1;
    firstpos[np] = len[np] - 1;
    for (int i = 0; i < 26; i++) nxt[np][i] = -1;
    for (; p != -1 && nxt[p][c] == -1; p = link[p]) nxt[p][c] = np;
    if (p == -1)
      link[np] = 0;
    else {
      int q = nxt[p][c];
      if (len[p] + 1 == len[q])
        link[np] = q;
      else {
        int nq = ++tot;
        endpos[nq] = 0;
        len[nq] = len[p] + 1;
        firstpos[nq] = firstpos[q];
        memcpy(nxt[nq], nxt[q], sizeof(nxt[q]));
        link[nq] = link[q];
        link[q] = link[np] = nq;
        for (; nxt[p][c] == q; p = link[p]) nxt[p][c] = nq;
      }
    }
    endpos[np] = 1;
  }

  void build(const string &s) {
    init();
    for (char i : s) insert(i - 'a');
  }

  bool match(char s[]) {
    int l = strlen(s);
    int p = 0;
    for (int i = 0; i < l; i++) {
      if (nxt[p][s[i] - 'a'] != -1)
        p = nxt[p][s[i] - 'a'];
      else
        return false;
    }
    return true;
  }

  int get_first_occurrence(string s) {
    int l = s.length();
    int p = 0;
    for (int i = 0; i < l; i++) p = nxt[p][s[i] - 'a'];
    return firstpos[p] - l + 1;
  }

  int get_substring_num(int x) {
    int ans = 0;
    for (int i = 0; i < 26; i++)
      if (nxt[x][i] != -1) {
        ans += get_substring_num(nxt[x][i]);
      }
    return d[x] = ans + 1;
  }  //统计完全不同的子串个数(建议作为初始化使用)

  int get_substring_length(int x) {
    int ans = 0;
    for (int i = 0; i < 26; i++)
      if (nxt[x][i] != -1) {
        ans += d[nxt[x][i]] + get_substring_length(nxt[x][i]);
      }
    return dp[x] = ans;
  }  //统计完全不同的子串总长度(也建议作为初始化使用)

  void get_k_substring(int x, int k, char ans[], int &l) {
    if (k == 1) {
      return;
    }
    for (int i = 0; i < 26; i++)
      if (k > d[nxt[x][i]])
        k -= d[nxt[x][i]];
      else {
        ans[++l] = (char)(i + 'a');
        get_k_substring(nxt[x][i], k, ans, l);
      }
  }  //求出所有子串中第k小的子串

  void get_min_substring(int x, int k, char ans[], int &l) {
    for (int i = 0; i < 26; i++)
      if (nxt[x][i] != -1) {
        ans[++l] = (char)(i + 'a');
        if (k == l)
          return;
        else
          get_min_substring(nxt[x][i], k, ans, l);
      }
  }  //寻找给定长度的最小字串

  void get_cnt() {
    memset(C, 0, sizeof(C));
    memset(A, 0, sizeof(A));
    for (int i = 0; i <= tot; i++) C[len[i]]++;
    for (int i = 0; i <= tot; i++) C[i] += C[i - 1];
    for (int i = 0; i <= tot; i++) A[C[len[i]]--] = i;
    for (int i = tot; i >= 0; i--) {
      int now = A[i];
      endpos[link[now]] += endpos[now];
    }
  }  //求某一子串在模式串中出现多少次的前置函数条件(建议作为初始化使用)

  int get_match_num(string s) {
    int length = s.length();
    int p = 0;
    for (int i = 0; i < length; i++) {
      if (nxt[p][s[i] - 'a'] != -1)
        p = nxt[p][s[i] - 'a'];
      else
        return 0;
    }
    return endpos[p];
  }  //求某一子串在模式串中出现多少次

  void get_min_not_attend(int x) {
    dp_ne[x] = 1;
    for (int i = 0; i < 26; i++)
      if (nxt[x][i] != -1) {
        get_min_not_attend(nxt[x][i]);
        dp_ne[x] += dp_ne[nxt[x][i]];
      }
    // dp_ne[0]就是所求答案
  }  //可通过记录dp路径来得出不在文本中出现的最短字符串

  string get_longest_public_substring(string T) {
    int p = 0, l = 0, ans = 0, anspoint = 0;
    for (char i : T) {
      if (nxt[p][i - 'a'] != -1)
        p = nxt[p][i - 'a'], l++;
      else {
        while (nxt[p][i - 'a'] != -1 && p) {
          p = link[p];
          l = len[p];
        }
      }
      if (l > ans) {
        ans = l;
        anspoint = p;
      }
    }
    return T.substr(anspoint - ans + 1, ans);
  }  //最长公共子串

  LL get_max_multiple_length_and_occurrence() {
    long long ans = 0;
    for (int i = 0; i <= tot; i++)
      if (endpos[i] > 1) ans = max(1LL * endpos[i] * len[i], ans);
    return ans;
  }

  void print() {
    for (int i = 0; i <= tot; i++) {
      cout << i << ' ' << endpos[i] << ' ' << len[i] << endl;
    }
  }
};

最小表示法

int minshow(string s) {  //表示s的所有循环同构中最小的一个
  int i = 0, j = 1, k = 0, n = (int)s.length();
  while (i < n && j < n && k < n) {
    if (s[(i + k) % n] == s[(j + k) % n])
      k++;
    else {
      if (s[(i + k) % n] > s[(j + k) % n])
        i += (k + 1);
      else
        j += (k + 1);
      if (i == j) i++;
      k = 0;
    }
  }
  return min(i, j);
}

两字符串比较字典序大小($ \log n $) -> $ n(\log n)^2 $的字符串排序

需要hsh预处理

true -> a<b

bool compare_string(string a, string b, const ull hsha[], const ull hshb[]) {
  int l = 0, r = min(a.length(), b.length()) - 1;
  if (hsha[r] == hshb[r])
    return a.length() < b.length();
  else {
    while (l < r) {
      int mid = (l + r) >> 1;
      if (hsha[mid] == hshb[mid])
        l = mid + 1;
      else
        r = mid - 1;
    }
    return a[l] < b[l];
  }
}
Hello, world!
最后更新于 2021-10-18