# Suffix Array
把給每個後綴字串排序,得到開頭位置,可用來求子字串,因為已經排序好了,可用二分搜來找答案
可把字串分為2^x^
x=0時代表只有一個字元要排序,然後離散化,求出rank
x=1(2^1^=2)時,可把子字串分為兩半,長度各為(2^x-1^=2^0^),因此可用上一次的rank來排序
x=2 由兩邊的x=1的rank來排序
x=3 由兩邊的x=2的rank來排序
做log2(n)次
```cpp=
#pragma GCC optimize("Ofast,unroll-loops")
#include <bits/stdc++.h>
#define int long long
using namespace std;
#define pii pair<int,int>
#define F first
#define S second
#define pb push_back
#define all(a) (a).begin(),(a).end()
#define mem(a) memset(a,0,sizeof(a))
#define binary_search
#define inf LLONG_MAX
#define mod 1000000007
#define MAXN 200005
int N=1,n,m,x,k,ans;
//-----------------------------------
string s,target;
vector<int> tmp,Rank,sa;
//[total,lens of 2^x's rank]=[left half,lens of 2^(x-1)'s rank][right half,lens of 2^(x-1)'s rank]
pii info(int i){ //get the rank of [i,i+k-1][i+k,i+2k-1]
if(i+k>=n) return make_pair(Rank[i],-1);
//out of bound so it can't bigger then anyone else which has two halves of susbstring
return make_pair(Rank[i],Rank[i+k]); //two halves of rank
}
bool cmp(int a,int b){ //sort by two halves of rank
return info(a)<info(b);
}
signed main(){
cin.sync_with_stdio(0),cin.tie(0);
getline(cin,s);
n=s.size();
//initialization
tmp.resize(n),Rank.resize(n),sa.resize(n);
for(int i=0;i<n;i++) sa[i]=i;
//sort substrings of lens 2^0=1
sort(all(sa),[&](int a,int b){
return s[a]<s[b];
});
//Discretization
Rank[sa[0]]=0; //init
for(int i=1;i<n;i++){
Rank[sa[i]]=Rank[sa[i-1]]+(s[sa[i]]!=s[sa[i-1]]);
//if s[sa[i]]!=s[sa[i-1]] represents different char,needs to add one to the rank
}
for(k=1;k<=n;k*=2){ //k=2^x,x>0
sort(all(sa),cmp);
//Discretization
tmp[sa[0]]=0; //init
for(int i=1;i<n;i++){
tmp[sa[i]]=tmp[sa[i-1]]+(info(sa[i])!=info(sa[i-1]));
//the same logic with line 49
}
Rank=tmp;
}
for(auto i:sa) cout << i << '\n';
}
```
## substack2
求子字串
由上述方法可求出後綴字串由小到大的起始位置,所以可由二分搜來找target字串
方法如下:
```cpp=
//m=target.size()
int l=0,r=n-1;
while(l<=r){
int mid=(l+r)>>1;
if(s.substr(sa[mid],m)==target){
Find=true;
break;
}
else if(s.substr(sa[mid],m)<target) l=mid+1;
else r=mid-1;
}
```