字符串
Hash->12种hash算法,有两到三种比较方便处理前缀hash和(BKDR和DJB已经写出)
typedef unsigned long long ull;
const ull p = 131;
const int N = 2e6 + 10;
ull BKDR_hsh[N];
ull DJB_hsh[N];
ull BKDR_hash(string s) {
ull tmp = 0;
for (int i = 0; i < s.length(); i++) {
BKDR_hsh[i] = tmp * p + s[i] - 'a' + 1;
tmp = BKDR_hsh[i];
}
return BKDR_hsh[s.length() - 1];
} //字符串hash + 前缀和hash处理
ull AP_hash(string s) {
ull hsh = 0;
for (int i = 0; i < s.length(); i++) {
if (i & 1)
hsh ^= ((hsh << 11) ^ (s[i] - 'a' + 1) ^ (hsh >> 5));
else
hsh ^= ((hsh << 7) ^ (s[i] - 'a' + 1) ^ (hsh >> 3));
}
return hsh;
}
ull DJB_hash(string s) {
ull hsh = 5381;
for (int i = 0; i < s.length(); i++) {
hsh = hsh * 32 + s[i] - 'a' + 1;
DJB_hsh[i] = hsh;
}
return DJB_hsh[s.length() - 1];
}
ull JS_hash(const string& s) {
ull hsh = 1315423911;
for (char i : s) hsh ^= ((hsh << 5) + (i - 'a' + 1) + (hsh >> 2));
return hsh;
}
ull RS_hash(const string& s) {
ull b = 378551;
ull a = 63689;
ull hsh = 0;
for (char i : s) {
hsh = hsh * a + i - 'a' + 1;
a *= b;
}
return hsh;
}
ull SDB_hash(const string& s) {
ull hsh = 0;
for (char i : s)
// equivalent to: hash = 65599*hash + (*str++);
hsh = (i - 'a' + 1) + (hsh << 6) + (hsh << 16) - hsh;
return hsh;
}
ull PJWHash(string s) {
ull BitsInUnignedInt = sizeof(ull) * 8;
ull ThreeQuarters = (ull)((BitsInUnignedInt * 3) / 4);
ull OneEighth = (ull)(BitsInUnignedInt / 8);
ull HighBits = (ull)(0xFFFFFFFF) << (BitsInUnignedInt - OneEighth);
ull hsh = 0;
ull test;
for (char i : s) {
hsh = (hsh << OneEighth) + i - 'a' + 1;
if ((test = hsh & HighBits) != 0)
hsh = ((hsh ^ (test >> ThreeQuarters)) & (~HighBits));
}
return hsh;
}
ull ELFHash(string s) {
ull hsh = 0;
ull x;
for (char i : s) {
hsh = (hsh << 4) + i - 'a' + 1;
if ((x = hsh & 0xF0000000L) != 0) {
hsh ^= (x >> 24);
hsh &= ~x;
}
}
return hsh;
}
ull DEKhash(string s) {
ull hsh = s.length();
for (char i : s) hsh = (hsh << 5) ^ (hsh >> 27) ^ (ull)(i - 'a' + 1);
return hsh;
}
ull BPhash(string s) {
ull hsh = s.length();
for (char i : s) {
hsh = (hsh << 7) ^ (ull)(i - 'a' + 1);
}
return hsh;
}
ull FNVhash(string s) {
ull fnvprime = 0x811C9DC5;
ull hsh = 0;
for (char i : s) {
hsh *= fnvprime;
hsh ^= (ull)(i - 'a' + 1);
}
return hsh;
}
ull JAVAhash(string s) {
ull hsh = 0;
for (char i : s) hsh = hsh * 31 + (ull)(i - 'a' + 1);
return hsh;
}
KMP
nxt[i]表示字符串s的前缀中,0~nxt[i]-1和i-nxt[i]+1~i两个子串是相同的
且nxt[i]最大and nxt[i]<=i
注释部分是在kmp用在纯匹配环节时用于加速的
非注释部分是用来求上文所说的nxt[i]的
void getnxt(string s, int nxt[]) {
int i = 0, j = -1;
nxt[0] = -1;
while (i < (int)s.length()) {
if (j == -1 || s[i] == s[j]) {
nxt[++i] = ++j;
// if (s[i] != s[j])
// nxt[++i] = ++j;
// else
// nxt[++i] = nxt[++j];
} else
j = nxt[j];
}
}
void match(string a, string b, int nxt[]) {
int i = 0, j = 0;
getnxt(b, nxt);
while (i < (int)a.length()) {
if (j == -1 || a[i] == b[j])
++i, ++j;
else
j = nxt[j];
if (j == b.length()) cout << i - j + 1 << endl;
}
}
扩展 KMP(Z 函数)
Z[i]表示0~z[i]-1和i+z[i]-1两个子串完全一样且z[i]最大
void Z_box(string s, int z[]) {
z[0] = (int)s.length();
while (s[z[1]] == s[z[1] + 1]) z[1]++;
int nowright = 1 + z[1] - 1, nowleft = 1;
for (int i = 2; i < s.length(); i++) {
if (i <= nowright) {
int p = i - nowleft;
z[i] = min(z[p], nowright - i + 1);
} else
z[i] = 0;
while (s[i + z[i]] == s[z[i]]) z[i]++;
if (i + z[i] - 1 > nowright) {
nowright = i + z[i] - 1;
nowleft = i;
}
}
}
Manacher
m[i]表示以i为中心的最长回文串长度
显然只能表示奇数长度的回文串
void manacher(string s, int m[]) {
int l = (int)s.length();
m[0] = 1;
int nowmid = 0, nowright = 0;
for (int i = 1; i < l; i++) {
if (i <= nowright) {
int p = 2 * nowmid - i;
m[i] = min(m[p], (nowright - i + 1) * 2 - 1);
} else
m[i] = 1;
int x = m[i] / 2 + 1;
while (i - x >= 0 && i + x < l && s[i - x] == s[i + x]) {
x++;
m[i] += 2;
}
if (i + x - 1 > nowright) {
nowright = i + x - 1;
nowmid = i;
}
}
}
马拉车算法辅助:既可求奇数长度,也可求偶数长度
这里ans 表示as的最长回文串长度
cin >> as;
for (int i = 0; i < as.length(); i++) {
bs += '#';
bs += as[i];
}
bs += '#';
manacher(bs, m);
int ans = 0;
for (int i = 0; i < bs.length(); i++) ans = max(ans, (m[i] - 1) / 2);
cout << ans << endl;
回文自动机(PAM)
len[i]表示第i个状态所表示的回文串长度
num[i]表示以节点i表示的最长回文串的最右端点为回文串结尾的回文串个数
cnt[i]第i个节点表示的回文串出现的次数
struct PAM {
int nxt[N][26]{};
int fail[N]{};
int len[N]{};
int cnt[N]{};
int sum[N]{};
int last{}, tot{};
string s; //插入后的字符串
int newnode(int x) {
len[++tot] = x;
return tot;
}
void init() {
memset(nxt, 0, sizeof(nxt));
memset(fail, 0, sizeof(fail));
memset(cnt, 0, sizeof(cnt));
memset(len, 0, sizeof(len));
s = "#";
s[0] = -1;
tot = -1;
newnode(0), newnode(-1);
fail[0] = 1;
last = 0;
} //初始化
int get_fail(int x, int k) {
for (; s[k - len[x] - 1] != s[k]; x = fail[x])
;
return x;
}
void add(int x, int k) {
int cur = get_fail(last, k);
if (!nxt[cur][x]) {
int now = newnode(len[cur] + 2);
fail[now] = nxt[get_fail(fail[cur], k)][x];
sum[now] = sum[fail[now]] + 1;
nxt[cur][x] = now;
}
cnt[last = nxt[cur][x]]++;
} // PAM上添加一个新点
int get_longest_Palindrome() {
int ans = 0;
for (int i = tot; i >= 0; i--) ans = max(ans, len[i]);
return ans;
} //最长回文串长度
void build(const string& as) {
init();
s += as;
for (int i = 1; i < s.length(); i++) add(s[i] - 'a', i);
for (int i = tot; i >= 0; i--) cnt[fail[i]] += cnt[i];
} //回文自动机建立
} p;
AC自动机
struct trie {
int cnt = 0;
int root{};
int next[N][26]{};
int fail
[N]{}; // fail[i]表示在结点i失配时跳转到具有最长公共前后缀的字符继续匹配
int find[N]{}; // find[i]表示结点i向上跳fail依然有end标记
int end[N]{};
int que[N]{}, tal = 0, hed = 1;
int new_node() {
cnt++;
for (int i = 0; i < 26; i++) next[cnt][i] = -1;
fail[cnt] = 0;
find[cnt] = 0;
end[cnt] = 0;
return cnt;
}
void new_root() {
cnt = 0;
root = new_node();
}
void add(string s, int tt) {
int l = (int)s.length() - 1;
int now = root;
for (int i = 0; i <= l; i++) {
if (next[now][s[i] - 'a'] == -1) next[now][s[i] - 'a'] = new_node();
now = next[now][s[i] - 'a'];
}
end[now] = tt;
ans[tt] = 0;
}
void build() {
fail[root] = root;
for (int i = 0; i < 26; i++)
if (next[root][i] == -1)
next[root][i] = root;
else {
fail[next[root][i]] = root;
que[++tal] = next[root][i];
}
while (hed <= tal) {
int tmp = que[hed++];
for (int i = 0; i < 26; i++)
if (next[tmp][i] == -1)
next[tmp][i] = next[fail[tmp]][i];
else {
fail[next[tmp][i]] = next[fail[tmp]][i];
que[++tal] = next[tmp][i];
if (end[fail[next[tmp][i]]] || find[fail[next[tmp][i]]])
find[next[tmp][i]] = 1;
}
}
}
void search(string b) {
int l = b.length() - 1;
int tmp = root;
for (int i = 0; i <= l; i++) {
tmp = next[tmp][b[i] - 'a'];
int tep = tmp;
while (tep != root && (find[tep] != 0 || end[tep] != 0)) {
if (end[tep]) {
ans[end[tep]]++;
maxn = max(maxn, ans[end[tep]]);
}
tep = fail[tep];
}
}
}
} t;
PAM后缀自动机
支持以下功能:
(1)寻找目标串是否在模式串中出现
(2)寻找目标串在模式串中第一次出现的位置
(3)统计模式串中完全不同的子串个数(也可以魔改后缀排序)
(4)统计模式串中完全不同的所有子串长度之和
(5)求出模式串所有子串中第k小的子串
(6)求出模式串所有子串中给定长度的最小子串
(7)求出某一子串在模式串中出现多少次
(8)不在模式串中出现的最短字符串
(9)两个字符串的最长公共子串
class sam {
private:
int len[maxn * 2 - 1];
int link[maxn * 2 - 1];
int tot, last;
int nxt[maxn * 2 - 1][26];
int d[maxn * 2 - 1];
int dp[maxn * 2 - 1];
int dp_ne[maxn * 2 - 1];
int C[maxn * 2 - 1];
int A[maxn * 2 - 1];
int endpos[maxn * 2 - 1];
int firstpos[maxn * 2 - 1];
//备注:
// endpos(v)表示字符串v在母串中所有结束位置下标的集合
//这里的endpos[x]表示x结点上的状态的endpos集合的大小
// firstpos[x]表示endpos集合中最靠前的
// C,A是辅助数组
public:
void init() {
tot = 0;
last = 0;
len[0] = 0;
for (int i = 0; i < 26; i++) nxt[0][i] = -1;
link[0] = -1;
}
void insert(int c) {
int p = last, np = ++tot;
last = np;
len[np] = len[p] + 1;
firstpos[np] = len[np] - 1;
for (int i = 0; i < 26; i++) nxt[np][i] = -1;
for (; p != -1 && nxt[p][c] == -1; p = link[p]) nxt[p][c] = np;
if (p == -1)
link[np] = 0;
else {
int q = nxt[p][c];
if (len[p] + 1 == len[q])
link[np] = q;
else {
int nq = ++tot;
endpos[nq] = 0;
len[nq] = len[p] + 1;
firstpos[nq] = firstpos[q];
memcpy(nxt[nq], nxt[q], sizeof(nxt[q]));
link[nq] = link[q];
link[q] = link[np] = nq;
for (; nxt[p][c] == q; p = link[p]) nxt[p][c] = nq;
}
}
endpos[np] = 1;
}
void build(const string &s) {
init();
for (char i : s) insert(i - 'a');
}
bool match(char s[]) {
int l = strlen(s);
int p = 0;
for (int i = 0; i < l; i++) {
if (nxt[p][s[i] - 'a'] != -1)
p = nxt[p][s[i] - 'a'];
else
return false;
}
return true;
}
int get_first_occurrence(string s) {
int l = s.length();
int p = 0;
for (int i = 0; i < l; i++) p = nxt[p][s[i] - 'a'];
return firstpos[p] - l + 1;
}
int get_substring_num(int x) {
int ans = 0;
for (int i = 0; i < 26; i++)
if (nxt[x][i] != -1) {
ans += get_substring_num(nxt[x][i]);
}
return d[x] = ans + 1;
} //统计完全不同的子串个数(建议作为初始化使用)
int get_substring_length(int x) {
int ans = 0;
for (int i = 0; i < 26; i++)
if (nxt[x][i] != -1) {
ans += d[nxt[x][i]] + get_substring_length(nxt[x][i]);
}
return dp[x] = ans;
} //统计完全不同的子串总长度(也建议作为初始化使用)
void get_k_substring(int x, int k, char ans[], int &l) {
if (k == 1) {
return;
}
for (int i = 0; i < 26; i++)
if (k > d[nxt[x][i]])
k -= d[nxt[x][i]];
else {
ans[++l] = (char)(i + 'a');
get_k_substring(nxt[x][i], k, ans, l);
}
} //求出所有子串中第k小的子串
void get_min_substring(int x, int k, char ans[], int &l) {
for (int i = 0; i < 26; i++)
if (nxt[x][i] != -1) {
ans[++l] = (char)(i + 'a');
if (k == l)
return;
else
get_min_substring(nxt[x][i], k, ans, l);
}
} //寻找给定长度的最小字串
void get_cnt() {
memset(C, 0, sizeof(C));
memset(A, 0, sizeof(A));
for (int i = 0; i <= tot; i++) C[len[i]]++;
for (int i = 0; i <= tot; i++) C[i] += C[i - 1];
for (int i = 0; i <= tot; i++) A[C[len[i]]--] = i;
for (int i = tot; i >= 0; i--) {
int now = A[i];
endpos[link[now]] += endpos[now];
}
} //求某一子串在模式串中出现多少次的前置函数条件(建议作为初始化使用)
int get_match_num(string s) {
int length = s.length();
int p = 0;
for (int i = 0; i < length; i++) {
if (nxt[p][s[i] - 'a'] != -1)
p = nxt[p][s[i] - 'a'];
else
return 0;
}
return endpos[p];
} //求某一子串在模式串中出现多少次
void get_min_not_attend(int x) {
dp_ne[x] = 1;
for (int i = 0; i < 26; i++)
if (nxt[x][i] != -1) {
get_min_not_attend(nxt[x][i]);
dp_ne[x] += dp_ne[nxt[x][i]];
}
// dp_ne[0]就是所求答案
} //可通过记录dp路径来得出不在文本中出现的最短字符串
string get_longest_public_substring(string T) {
int p = 0, l = 0, ans = 0, anspoint = 0;
for (char i : T) {
if (nxt[p][i - 'a'] != -1)
p = nxt[p][i - 'a'], l++;
else {
while (nxt[p][i - 'a'] != -1 && p) {
p = link[p];
l = len[p];
}
}
if (l > ans) {
ans = l;
anspoint = p;
}
}
return T.substr(anspoint - ans + 1, ans);
} //最长公共子串
LL get_max_multiple_length_and_occurrence() {
long long ans = 0;
for (int i = 0; i <= tot; i++)
if (endpos[i] > 1) ans = max(1LL * endpos[i] * len[i], ans);
return ans;
}
void print() {
for (int i = 0; i <= tot; i++) {
cout << i << ' ' << endpos[i] << ' ' << len[i] << endl;
}
}
};
最小表示法
int minshow(string s) { //表示s的所有循环同构中最小的一个
int i = 0, j = 1, k = 0, n = (int)s.length();
while (i < n && j < n && k < n) {
if (s[(i + k) % n] == s[(j + k) % n])
k++;
else {
if (s[(i + k) % n] > s[(j + k) % n])
i += (k + 1);
else
j += (k + 1);
if (i == j) i++;
k = 0;
}
}
return min(i, j);
}
两字符串比较字典序大小($ \log n $) -> $ n(\log n)^2 $的字符串排序
需要hsh预处理
true -> a<b
bool compare_string(string a, string b, const ull hsha[], const ull hshb[]) {
int l = 0, r = min(a.length(), b.length()) - 1;
if (hsha[r] == hshb[r])
return a.length() < b.length();
else {
while (l < r) {
int mid = (l + r) >> 1;
if (hsha[mid] == hshb[mid])
l = mid + 1;
else
r = mid - 1;
}
return a[l] < b[l];
}
}
Comments NOTHING