字符串模板 | vegeone
0%

字符串模板

以下是我的字符串模板:编辑距离、后缀数组、回文树、扩展KMP、字符串哈希、AC自动机、Border、KMP、Manacher、SAM

编辑距离

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#include<bits/stdc++.h>
#define int long long
using namespace std;
#define endl '\n'
const int N=2e3+10;
typedef pair<int,int>pp;
int f[N][N];
signed main()
{
ios::sync_with_stdio(false);
cin.tie(0);
cout.tie(0);

string s,t;
cin>>s>>t;
int n=s.size(),m=t.size();
for(int i=0;i<=n;++i)
f[i][0]=i;
for(int i=0;i<=m;++i)
f[0][i]=i;
for(int i=1;i<=n;++i){
for(int j=1;j<=m;++j){
f[i][j]=min(f[i-1][j],f[i][j-1])+1;
f[i][j]=min(f[i][j],f[i-1][j-1]+(s[i-1]!=t[j-1]));
//cerr<<i<<" "<<j<<" "<<f[i][j]<<endl;
}
}
cout<<f[n][m]<<endl;

return 0;
}

后缀数组

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
/*https://www.luogu.com.cn/problem/P3809*/
#include<bits/stdc++.h>
#define endl '\n'
using namespace std;

const int N=2e6+10;
char s[N];
int n,m;//n为后缀个数, m为桶的个数
int idd[N],cnt[N],sa[N],rk[N],oldrk[N],key1[N];
int height[N];
//桶数组rk[i],辅助数组idd[i],计数数组cnt[i]
bool cmp(int x, int y, int w) {
return oldrk[x] == oldrk[y] && oldrk[x + w] == oldrk[y + w];
}
void get_sa(){
int i,p,w;
for (i = 1; i <= n; ++i) ++cnt[rk[i] = s[i]];
for (i = 1; i <= m; ++i) cnt[i] += cnt[i - 1];
for (i = n; i >= 1; --i) sa[cnt[rk[i]]--] = i;

for (w = 1;; w <<= 1, m = p) { // m=p 就是优化计数排序值域
for (p = 0, i = n; i > n - w; --i) idd[++p] = i;
for (i = 1; i <= n; ++i)
if (sa[i] > w) idd[++p] = sa[i] - w;

memset(cnt, 0, sizeof(cnt));
for (i = 1; i <= n; ++i) ++cnt[key1[i] = rk[idd[i]]];
// 注意这里px[i] != i,因为rk没有更新,是上一轮的排名数组

for (i = 1; i <= m; ++i) cnt[i] += cnt[i - 1];
for (i = n; i >= 1; --i) sa[cnt[key1[i]]--] = idd[i];
memcpy(oldrk + 1, rk + 1, n * sizeof(int));
for (p = 0, i = 1; i <= n; ++i)
rk[sa[i]] = cmp(sa[i], sa[i - 1], w) ? p : ++p;
if (p == n) {
break;
}
}
}
void get_height(){
int i,j,k;
for(i=1;i<=n;i++)rk[sa[i]]=i;
for(i=1,k=0;i<=n;i++){ //枚举后缀i
if(rk[i]==1)continue;//第一名height为0
if(k)k--;//上一个后缀的height值减1
int j=sa[rk[i]-1];//找出后缀i的前邻后缀j
while(i+k<=n&&j+k<=n&&s[i+k]==s[j+k])k++;
height[rk[i]]=k;
}
}
int main(){
ios::sync_with_stdio(false);
cin.tie(0),cout.tie(0);

cin>>s+1;
n=strlen(s+1); m=127;
get_sa();
//get_height();
for(int i=1;i<=n;i++)cout<<sa[i]<<" "<<rk[i]<<" \n";
//for(int i=1;i<=n;i++)cout<<height[i]<<" \n"[i==n];

return 0;
}

回文树

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
/*https://www.luogu.com.cn/problem/P5496*/
#include<bits/stdc++.h>
#define int long long
using namespace std;
#define endl '\n'
const int N=1e5+10;
typedef pair<int,int>pp;
int a[N];
struct PAM {
static constexpr int M = 26;
int n;
vector<array<int, M>> t;
vector<int> link, len, s, trans, val, count;
int cur, cnt;

PAM(){}
PAM(int x) {init(x);}

void init(int x) {
n = x + 2;
t.assign(n, {});
link.assign(n, 0);//fail指针
count.assign(n, 0);
val.assign(n, 0);//回文串个数
len.assign(n, 0);
s.assign(n, -1);
trans.assign(n, 0);
cur = 0;
cnt = 2;
len[1] = -1;
link[0] = link[1] = 1;
}


int extend(int p, int c) {
s[++cur] = c;
int now = getfail(p);
if (!t[now][c]) {
int u = cnt++;
len[u] = len[now] + 2;
link[u] = t[getfail(link[now])][c];
/*if (len[u] < 2) {
trans[u] = link[u];
} else {
int tmp = trans[now];
while (s[cur - len[tmp] - 1] != s[cur] || ((len[tmp] + 2) << 1) > len[u]) tmp = link[tmp];
//len[tmp]+2
trans[u] = t[tmp][c];
val[u] = val[link[u]] + 1;

}
count[u] = 2 * len[trans[u]] == len[u];
count[u] += count[link[u]];*/
val[u] = val[link[u]] + 1;
t[now][c] = u;
}
p = t[now][c];
//val[p]++;
return p;
}

int getfail(int x) {
while (s[cur - len[x] - 1] != s[cur]) x = link[x];
return x;
}

} pam;
void solve()
{
string str;
cin>>str;
int n=str.size();
pam.init(n);
int k=0;
int p=1;
for(int i=0;i<n;++i){
char ch=(str[i]-97+k)%26+97;
p=pam.extend(p,ch-'a');
k=pam.val[p];
cout<<k<<" ";
}
cout<<endl;
}
signed main()
{
ios::sync_with_stdio(false);
cin.tie(0);
cout.tie(0);
int T=1;
//cin>>T;
while(T--) solve();
return 0;
}

扩展KMP

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
/*https://www.luogu.com.cn/problem/P5410*/
#include<bits/stdc++.h>
#define endl '\n'
using namespace std;
const int N=2e7+5;
string t,s;
int z[N],p[N];

void get_z(string s,int n){
z[1]=n;
for(int i=2,l,r=0;i<=n;i++){
if(i<=r)z[i]=min(z[i-l+1],r-i+1);
while(s[1+z[i]]==s[i+z[i]])z[i]++;
if(i+z[i]-1>r)l=i,r=i+z[i]-1;
}
}
void get_p(string s,int n,string t,int m){
for(int i=1,l,r=0;i<=m;i++){
if(i<=r)p[i]=min(z[i-l+1],r-i+1);
while(1+p[i]<=n&&i+p[i]<=m&&s[1+p[i]]==t[i+p[i]])p[i]++;
if(i+p[i]-1>r)l=i,r=i+p[i]-1;
}
}
std::vector<int> zFunction(std::string s) {//0-base jiangly
int n = s.size();
std::vector<int> z(n + 1);
z[0] = n;
for (int i = 1, j = 1; i < n; i++) {
z[i] = std::max(0, std::min(j + z[j] - i, z[i - j]));
while (i + z[i] < n && s[z[i]] == s[i + z[i]]) {
z[i]++;
}
if (i + z[i] > j + z[j]) {
j = i;
}
}
return z;
}
int main(){
ios::sync_with_stdio(false);
cin.tie(0);
cout.tie(0);

cin>>t>>s;
t=' '+t,s=' '+s;
int m=t.size()-1,n=s.size()-1;
get_z(s,n);
get_p(s,n,t,m);

long long ans1=0,ans2=0;
for(int i=1; i<=n; i++)
ans1^=1LL*i*(z[i]+1);
for(int i=1; i<=m; i++)
ans2^=1LL*i*(p[i]+1);
cout<<ans1<<endl<<ans2;
return 0;
}

字符串哈希

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
/*https://www.acwing.com/problem/content/843/*/
/*https://www.acwing.com/problem/content/843/*/
#include<bits/stdc++.h>
#define int long long
#define endl '\n'
using namespace std;
const int p=131,N=1e5+10;
int n,m;
char str[N];
int pp[N],h[N];
void init()
{
for(int i=1;i<=n;++i){
pp[i]=pp[i-1]*p;
h[i]=h[i-1]*p+str[i];
//cout<<i<<" "<<h[i]<<endl;
}
}
int query(int l,int r){
//cout<<l<<" "<<r<<" "<< " "<<h[l-1]*pp[r-l+1]<<" "<<h[r]-h[l-1]*pp[r-l+1]<<endl;
return h[r]-h[l-1]*pp[r-l+1];
}
signed main()
{
ios::sync_with_stdio(false);
cin.tie(0);
cout.tie(0);

cin>>n>>m;
cin>>str+1;
pp[0]=1;
init();
for(int i=1;i<=m;++i){
int l1,r1,l2,r2;
cin>>l1>>r1>>l2>>r2;
if(query(l1,r1)==query(l2,r2))cout<<"Yes"<<endl;
else cout<<"No"<<endl;
}

return 0;
}

AC自动机

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
/*https://www.luogu.com.cn/problem/P5357*/
#include<bits/stdc++.h>
#define endl '\n'
using namespace std;

typedef char chr;
typedef deque<int> dic;
const int maxN = 2e5;
const int maxS = 2e5;
const int maxT = 2e6;

int n;
chr s[maxS + 10];
chr t[maxT + 10];
int cnt[maxN + 10];

struct AhoCorasickAutomaton {
struct Node {
int son[30];
int val;
int fail;
int head;
dic index;
} node[maxS + 10];

struct Edge {
int head;
int next;
} edge[maxS + 10];

int root;
int ncnt;
int ecnt;

void Insert(chr *str, int i) {
int u = root;
int m=strlen(str+1);
for (int i = 1; i<=m; i++) {
if (node[u].son[str[i] - 'a' + 1] == 0)
node[u].son[str[i] - 'a' + 1] = ++ncnt;
u = node[u].son[str[i] - 'a' + 1];
}
node[u].index.push_back(i);
return;
}

void Build() {
dic q;
for (int i = 1; i <= 26; i++)
if (node[root].son[i]) q.push_back(node[root].son[i]);
while (!q.empty()) {
int u = q.front();
q.pop_front();
for (int i = 1; i <= 26; i++) {
if (node[u].son[i]) {
node[node[u].son[i]].fail = node[node[u].fail].son[i];
q.push_back(node[u].son[i]);
} else {
node[u].son[i] = node[node[u].fail].son[i];
}
}
}
return;
}

void Query(chr *str) {
int u = root;
int m=strlen(str+1);
for (int i = 1; i<=m; i++) {
u = node[u].son[str[i] - 'a' + 1];
node[u].val++;
}
return;
}

void addEdge(int tail, int head) {
ecnt++;
edge[ecnt].head = head;
edge[ecnt].next = node[tail].head;
node[tail].head = ecnt;
return;
}

void DFS(int u) {
for (int e = node[u].head; e; e = edge[e].next) {
int v = edge[e].head;
DFS(v);
node[u].val += node[v].val;
}
for (auto i : node[u].index) cnt[i] += node[u].val;
return;
}

void FailTree() {
for (int u = 1; u <= ncnt; u++) addEdge(node[u].fail, u);
DFS(root);
return;
}
} ACM;

int main() {
ios::sync_with_stdio(0);
cin.tie(0);
cout.tie(0);
cin >> n;
for (int i = 1; i <= n; i++) {
cin >> (s + 1);
ACM.Insert(s, i);
}
ACM.Build();
cin >> (t + 1);
ACM.Query(t);
ACM.FailTree();
for (int i = 1; i <= n; i++)cout << cnt[i] << endl;
return 0;
}
/*
#include <bits/stdc++.h>
#define maxn 8000001
using namespace std;
char s[maxn];
int n, cnt, vis[maxn], rev[maxn], indeg[maxn], ans;

struct trie_node {
int son[27];
int fail;
int flag;
int ans;

void init() {
memset(son, 0, sizeof(son));
fail = flag = 0;
}
} trie[maxn];

queue<int> q;

void init() {
for (int i = 0; i <= cnt; i++) trie[i].init();
for (int i = 1; i <= n; i++) vis[i] = 0;
cnt = 1;
ans = 0;
}

void insert(char *s, int num) {
int u = 1, len = strlen(s);
for (int i = 0; i < len; i++) {
int v = s[i] - 'a';
if (!trie[u].son[v]) trie[u].son[v] = ++cnt;
u = trie[u].son[v];
}
if (!trie[u].flag) trie[u].flag = num;
rev[num] = trie[u].flag;
return;
}

void getfail(void) {
for (int i = 0; i < 26; i++) trie[0].son[i] = 1;
q.push(1);
trie[1].fail = 0;
while (!q.empty()) {
int u = q.front();
q.pop();
int Fail = trie[u].fail;
for (int i = 0; i < 26; i++) {
int v = trie[u].son[i];
if (!v) {
trie[u].son[i] = trie[Fail].son[i];
continue;
}
trie[v].fail = trie[Fail].son[i];
indeg[trie[Fail].son[i]]++;
q.push(v);
}
}
}

void topu() {
for (int i = 1; i <= cnt; i++)
if (!indeg[i]) q.push(i);
while (!q.empty()) {
int fr = q.front();
q.pop();
vis[trie[fr].flag] = trie[fr].ans;
int u = trie[fr].fail;
trie[u].ans += trie[fr].ans;
if (!(--indeg[u])) q.push(u);
}
}

void query(char *s) {
int u = 1, len = strlen(s);
for (int i = 0; i < len; i++) u = trie[u].son[s[i] - 'a'], trie[u].ans++;
}

int main() {
scanf("%d", &n);
init();
for (int i = 1; i <= n; i++) scanf("%s", s), insert(s, i);
getfail();
scanf("%s", s);
query(s);
topu();
for (int i = 1; i <= n; i++) cout << vis[rev[i]] << std::endl;
return 0;
}*/

Border

引理1:如果有一个border 长度大于 的一半,可以得出得 有周期 ||−||
引理2:如果 p,q都为周期,则 gcd(p,q)也为周期
引理3:字符串所有不小于 ||一半的border构成一个等差数列
引理4:可以把字符串分成 log||段,每一段的border都是一个等差数列

在KMP匹配中,我们可以利用这个性质快速跳过一串border
具体而言,在一次跳border时,如果发现border长度不小于原串的一半,则接下来的border构成等差数列,直到一半以下(引理3)
可以直接跳到 处,即比一半大的第一个位置(整除)(网上博客直接跳到了 处,经过几道题检验也是对的,但不是很能理解)

一次至少跳一半,保证 次以内可以跳完

KMP

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
/*https://www.acwing.com/problem/content/description/833/*/
#include<bits/stdc++.h>
#define int long long
#define endl '\n'
using namespace std;

class KMP
{
vector<int> nx;
string b;
public:
KMP(string b)
{
this->b = b;
int n = b.length();
int j = 0;
nx.resize(n);
for (int i = 1; i < n; i++)
{

while (j > 0 && b[i] != b[j])
j = nx[j - 1];
if (b[i] == b[j])
j++;
nx[i] = j;
}
}
int find(string a) // a中出现多少次b
{
int n = b.length(), m = a.length();
int j = 0;
int ans = 0;
for (int i = 0; i < m; i++)
{
while (j > 0 && a[i] != b[j])
j = nx[j - 1];
if (a[i] == b[j])
j++;
if (j == n)
{
ans++;
j = nx[j - 1];
}
}
return ans;
}
};
int m,n;
const int N=1e5+10,M=1e6+10;
int nex[N];
char p[N],s[M];
signed main()
{
ios::sync_with_stdio(false);
cin.tie(0);
cout.tie(0);

cin>>n>>p+1>>m>>s+1;//若无输入字符串长度先提前存储长度,否则Strlen函数将花费大量时间

for(int i=2,j=0;i<=n;++i){
while(j&&p[j+1]!=p[i])j=nex[j];
if(p[j+1]==p[i])++j;
nex[i]=j;
}
for(int i=1,j=0;i<=m;++i){
while(j&&p[j+1]!=s[i])j=nex[j];
if(p[j+1]==s[i])++j;
if(j==n){
//匹配成功
cout<<i-n<<" ";
j=nex[j];
}
}
cout<<endl;
return 0;
}
/*3
aba
5
ababa*/

Manacher

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
/*https://www.luogu.com.cn/problem/P3805*/
#include<bits/stdc++.h>
using namespace std;
#define endl '\n'
const int N=11e6+10;
string str,ss;
int d[N*2],ans=1;//ans包括#
inline void getd(string s){
d[1]=1;
for(int i=2,l,r=1;i<s.size();++i){
if(i<=r)d[i]=min(d[r-i+l],r-i+1);
while(s[i-d[i]]==s[i+d[i]])d[i]++;
if(i+d[i]-1>r)l=i-d[i]+1,r=i+d[i]-1;
ans=max(ans,d[i]);
}
}

int main()
{
ios::sync_with_stdio(false);
cin.tie(0);
cout.tie(0);
cin>>str;
ss+='$';
ss+='#';
for(int i=0;i<str.size();++i){
ss+=str[i];
ss+='#';
}
getd(ss);
cout<<ans-1<<endl;

return 0;
}

SAM

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
/*https://www.luogu.com.cn/problem/P3804*/
#include<bits/stdc++.h>
// #define int long long
using namespace std;
#define endl '\n'
typedef long long ll;
const int NN=2e6+10;
typedef pair<int,int>pp;
struct SAM {
static const int M = 26, N = 1e6;
struct Node {
int len;
int link;
int sz;
array<int, M> nxt;
Node() = default;
} t[2 * N];
int cnt;
SAM() {init();}
int newNode() {
int u = cnt++;
t[u].len = t[u].link = 0;
t[u].nxt.fill(0);
t[u].sz=0;
// t[u].sz=1;
return u;
}
void init() {
cnt = 0;
newNode();
newNode();
t[0].nxt.fill(1);
t[0].len = -1;
}
int extend(int p, int c) {
// cerr<<p<<" "<<t[p].nxt[c]<<" "<<t[p].len<<" "<<t[p].link<<endl;
if (t[p].nxt[c]) {
int q = t[p].nxt[c];
if (t[q].len == t[p].len + 1)
return q;
int r = newNode();
t[r] = t[q];
t[r].sz=0;
t[r].len = t[p].len + 1;
t[q].link = r;
while (t[p].nxt[c] == q) {
t[p].nxt[c] = r;
p = t[p].link;
}
return r;
}
int cur = newNode();
// cerr<<"cur:"<<cur<<" "<<cnt<<endl;
t[cur].len = t[p].len + 1;
t[cur].sz=1;

while (!t[p].nxt[c]) {
t[p].nxt[c] = cur;
p = t[p].link;
}
t[cur].link = extend(p, c);
// cerr<<"cur"<<cur<<" "<<t[cur].link<<" "<<t[cur].len<<endl;
return cur;
}

int extend(int p, char c, char offset = 'a') {
return extend(p, c - offset);
}
int nxt(int p, int x) {
return t[p].nxt[x];
}
int nxt(int p, char c, char offset = 'a') {
return nxt(p, c - 'a');
}
int link(int p) {
return t[p].link;
}
int len(int p) {
return t[p].len;
}
int size() {
return cnt;
}
} sam;
int h[NN],e[NN],nt[NN],idx;
void add(int a,int b){
e[++idx]=b;
nt[idx]=h[a];
h[a]=idx;
}
ll res=0;
void dfs(int u){
for(int i=h[u];i!=-1;i=nt[i]){
dfs(e[i]);
sam.t[u].sz+=sam.t[e[i]].sz;
}
if(sam.t[u].sz!=1){
// cerr<<sam.t[u].sz<<" "<<sam.t[u].len<<endl;
res=max(res,1ll*sam.t[u].sz*sam.t[u].len);
// cerr<<res<<endl;
}
}
void solve()
{
memset(h,-1,sizeof h);
string str;
cin>>str;
int n=str.size();
vector<int>p(n+1);
p[0]=1;
for(int i=0;i<n;++i){
// cerr<<str[i]<<endl;
p[i+1]=sam.extend(p[i],str[i]);
// cerr<<p[i+1]<<" "<<sam.t[p[i+1]].nxt[str[i]]<<" "<<sam.t[p[i+1]].len<<" "<<sam.t[p[i+1]].link<<endl;
// cerr<<"cnt"<<sam.cnt<<endl;
}

for(int i=2;i<sam.size();++i){
add(sam.link(i),i);
}
dfs(1);
cout<<res<<endl;
}
int main()
{
ios::sync_with_stdio(false);
cin.tie(0);
cout.tie(0);
int T=1;
// cin>>T;
while(T--) solve();
return 0;
}