矩陣乘法與應用

# 矩陣乘法與應用 by balbit --- ## 矩陣是什麼? 可以想成向量轉換 --- ## 矩陣乘法定義相當於合併兩個轉換,變成一個新的 $$ A \cdot B = C $$ $$ C_i,_j = \sum A_i,_k \cdot B_k,_j $$ 很難讓矩陣乘法比 $O(k^3)$ 快。 ---- ## 有用的知識 $$ ( A \cdot B ) \cdot C = A \cdot (B \cdot C) $$ --- ## 實作風格 ---- ### 1. 學黃大師乖乖包struct: ```c template <typename T> struct matrix{ int row,col; T val[2][2]; matrix():row(2),col(2){} matrix operator *(const matrix& o) const { matrix res; rep(row) rep1(j,o.col) rep1(k,col) res.val[i][j]+=val[i][k]*o.val[k][j]; return res; } }; ``` ---- ### 2. 學我亂寫 ```c typedef array<ll, 100> Vec; typedef array<Vec, 100> Mat; ``` --- ## 快速幂如何快速求 $a ^ b \% P$ ? ---- 分 $b$ 為 $2^{k_1} + 2^{k_2} + ...$ 以 $a^{2^k} \cdot a^{2^k} = a^{2^{k+1}}$ 求出所有 $a^{2^x}$ 及有$O(\log_2(b))$ 次乘法 --- ## 中速線性遞迴廣義fibonacci 定義 $f(x) = a_1 * f(x-1) + ... + a_k * f(x-k)$ 給定 $f(0), f(1), ..., f(k-1)$, 求 $f(n)$ $n \leq 10^{18}$ $k \leq 100$ 目標： $O(k^3 \log n)$ ---- ## 作法： $\begin{bmatrix} f_{x-k} & ... & f_{x-2} & f_{x-1} \\ \end{bmatrix}$ 到 $\begin{bmatrix} f_{x-k+1} & ... & f_{x-1} & f_{x} \\ \end{bmatrix}$ 只是一個線性轉換以 $k \times k$ 的矩陣做矩陣快速冪即可。 ---- ## 存在快速線性遞迴 $O(k^2 \log n)$ 太毒了 X_X $O(k \log k \log n)$ 我不會 --- ## 例題1: USACO COWBASIC 給你一個有定義和迴圈的程式碼, 求最後一個變數模$10^9+7$的值 loop 次數 $\leq 10^5$ 行數 $\leq 100$ 行長度 $\leq 350$ 定義為\[變數名] = 許多變數的和 ---- Ex: ``` n = 1 nsq = 1 100000 MOO { 100000 MOO { nsq = ( nsq ) + ( ( n ) + ( ( n ) + ( 1 ) ) ) n = ( n ) + ( 1 ) } } RETURN nsq ``` ``` 4761 ``` 提示:可以忽略括弧 ---- ## Ex2: ``` x = 1 10 MOO { x = ( x ) + ( x ) } RETURN x ``` ``` 1024 ``` 它要包幾層迴圈都可以 ---- ### 觀察: 將單位 1 設為一個變數每行定義 \[變數名] = 許多變數的和其實是一個向量轉換每塊迴圈即是矩陣幂 $100^3 * \log_2(100000)$ 做掉 ---- ```c #include <bits/stdc++.h> #include <ext/pb_ds/assoc_container.hpp> using namespace std; #define ll long long #define pii pair<int, int> #define ull unsigned ll #define f first #define s second #define FOR(i,a,b) for (int i=(a); i<(b); ++i) #define REP(i,n) FOR(i,0,n) #define REP1(i,n) FOR(i,1,n+1) #define RREP(i,n) for (int i=(n-1); i>=0; --i) #define ALL(x) x.begin(),x.end() #define SZ(x) (int)x.size() #define SQ(x) (x)*(x) #define MN(a,b) a = min(a,(__typeof__(a))(b)) #define MX(a,b) a = max(a,(__typeof__(a))(b)) #define pb push_back #define SORT_UNIQUE(c) (sort(c.begin(),c.end()), c.resize(distance(c.begin(),unique(c.begin(),c.end())))) #ifdef BALBIT #define IOS() #define bug(...) fprintf(stderr,"#%d (%s) = ",__LINE__,#__VA_ARGS__),_do(__VA_ARGS__); template<typename T> void _do(T &&x){cerr<<x<<endl;} template<typename T, typename ...S> void _do(T &&x, S &&...y){cerr<<x<<", ";_do(y...);} #else #define IOS() ios_base::sync_with_stdio(0);cin.tie(0); #define endl '\n' #define bug(...) #endif const int iinf = 1<<29; const ll inf = 1ll<<60; const ll mod = 1e9+7; void GG(){cout<<"-1\n"; exit(0);} ll mpow(ll a, ll n, ll mo = mod){ // a^n % mod ll re=1; while (n>0){ if (n&1) re = re*a %mo; a = a*a %mo; n>>=1; } return re; } ll inv (ll b, ll mo = mod){ if (b==1) return b; return (mo-mo/b) * inv(mo%b) % mo; } const int maxn = 1e5+5; typedef array<ll, 100> Vec; typedef array<Vec, 100> Mat; map<string, int> ID; int IT = 1; void mul(Mat a, Mat b, Mat &c) { for (int i = 0; i<IT; ++i) { for (int j = 0; j<IT; ++j) { c[i][j] = 0; for (int k = 0; k<IT; ++k) { c[i][j] += a[i][k] * b[k][j]; c[i][j] %= mod; } } } } void pw(Mat & c, int T) { --T; Mat a = c; for(int i = 0; (1<<i) <= T; ++i) { if (T & (1<<i)) { mul(c,a,c); } mul(a,a,a); } } void addvec(Vec &a, Vec b) { for (int i = 0; i<IT; ++i) { a[i] += b[i]; if (a[i] >= mod) a[i] -= mod; } } Mat go() { string s; Mat re; for (int i = 0; i<100; ++i) for (int j = 0; j<100; ++j) re[i][j] = i==j; while (cin>>s) { if (isalpha(s[0])) { if (s == "RETURN") { string who; cin>>who; cout<<re[ID[who]][0]<<endl; for (auto & p : ID) bug(p.f, p.s); exit(0); }else{ int it = -1; if (ID.count(s)) { it = ID[s]; }else{ it = ID[s] = IT++; } string foo; cin>>foo; assert(foo == "="); cin.ignore(); string tmps; getline(cin, tmps); stringstream ss; ss<<tmps; string toto; Vec newv; newv.fill(0); while (ss >> toto) { bug(toto); if (toto != "(" && toto != "+" && toto != ")") { if (isalpha(toto[0])) { addvec (newv, re[ID[toto]]); }else{ (newv[0] += stoi(toto)) %= mod; } } } re[it] = newv; } } else if (s == "}") return re; else { string foo; cin>>foo; assert(foo == "MOO"); cin>>foo; assert(foo == "{"); Mat dm = go(); pw(dm,stoi(s)); mul(dm, re, re); } } } signed main(){ IOS(); #ifndef BALBIT freopen("cowbasic.in", "r", stdin); freopen("cowbasic.out", "w", stdout); #endif // BALBIT go(); } /* x = 1 10 MOO { x = ( x ) + ( x ) } RETURN x */ ``` --- ### 例題 2: AGC13E "Placing Squares" 你有一個長度為 $n \leq 10^9$ 的長條，其中 $m \leq 10^5$ 個點被標為「特殊點」。考慮所有在一個長條上放正方形的方式。一個放法合法的條件為： + 方形底邊貼長條 + 所有格子被蓋住一次 + 所有正方形的角落在整數點上 + 沒有任何正方形角落在特殊點上對於每個這種放法，將他的正方形面積乘起來。求所有放法的積的和。 ---- ---- ## 圖 ![](https://i.imgur.com/GP8NbWl.jpg =200x400) . ---- ## 面積的積... 好怪？想想如何轉換! ---- ---- ## 東西乘起來明顯是組合！新題序：問有幾種方式在長條的格上放黑球and/or白球，和點($x \neq 0$，$x$不為特殊點)上放分隔器使任兩個分隔器中有恰好黑白球各一顆。 ---- 從左邊掃到右邊看，狀態可以分成四種：(沒球，只有黑球，只有白球，都有)。以每一格當一個轉換矩陣想！每一格的矩陣考慮自己上方的格子和以右的分隔器。發現需要一種正常（可以用分隔器）轉換和特殊（不可用分隔器）轉換。 ---- ## 開心推矩陣！ $a$ = 無, $b$ = 黑, $c$ = 白, $d$ = 黑白可以分割： $\begin{bmatrix} a & b & c & d \\ \end{bmatrix} \cdot \begin{bmatrix} 2 & 1 & 1 & 1 \\ 1 & 1 & 0 & 1 \\ 1 & 0 & 1 & 1 \\ 1 & 0 & 0 & 1 \\ \end{bmatrix} = \begin{bmatrix} a' & b' & c' & d' \\ \end{bmatrix}$ ---- ### 不可分割：自己推 >_< 長得很像 ---- 對於兩個相鄰的「特殊點」，如果距離為d，則做 d-1次正常轉移和一次特殊轉移。 $O(m \cdot \log n \cdot 4^3)$ ```c #include <bits/stdc++.h> using namespace std; #define ll long long #define pii pair<int, int> #define ull unsigned ll #define f first #define s second #define ALL(x) x.begin(),x.end() #define SZ(x) (int)x.size() #define SQ(x) (x)*(x) #define MN(a,b) a = min(a,(__typeof__(a))(b)) #define MX(a,b) a = max(a,(__typeof__(a))(b)) #define pb push_back #define REP(i,n) for (int i = 0; i<n; ++i) #define SORT_UNIQUE(c) (sort(c.begin(),c.end()), c.resize(distance(c.begin(),unique(c.begin(),c.end())))) #ifdef BALBIT #define IOS() #define bug(...) fprintf(stderr,"#%d (%s) = ",__LINE__,#__VA_ARGS__),_do(__VA_ARGS__); template<typename T> void _do(T &&x){cerr<<x<<endl;} template<typename T, typename ...S> void _do(T &&x, S &&...y){cerr<<x<<", ";_do(y...);} #else #define IOS() ios_base::sync_with_stdio(0);cin.tie(0); #define endl '\n' #define bug(...) #endif const int iinf = 1e9+10; const ll inf = 1ll<<60; const ll mod = 1e9+7 ; void GG(){cout<<"0\n"; exit(0);} ll mpow(ll a, ll n, ll mo = mod){ // a^n % mod ll re=1; while (n>0){ if (n&1) re = re*a %mo; a = a*a %mo; n>>=1; } return re; } ll inv (ll b, ll mo = mod){ if (b==1) return b; return (mo-mo/b) * inv(mo%b,mo) % mo; } const int maxn = 1e6+5; typedef array<ll, 4> Vec; typedef array<Vec, 4> Mat; Mat T1 = {{{2,1,1,1},{1,1,0,1},{1,0,1,1},{1,0,0,1}}}; Mat T2 = {{{1,1,1,1},{0,1,0,1},{0,0,1,1},{0,0,0,1}}}; void mul(Mat a, Mat b, Mat &c) { REP(i,4) REP(j,4) { c[i][j] = 0; REP(k,4) { c[i][j] += a[i][k] * b[k][j]; } c[i][j] %= mod; } } Mat trans(int k) { Mat re; Mat a = T1; REP(i,4) REP(j,4) re[i][j] = i==j; --k; while ( k > 0) { if (k & 1) mul(re,a,re); mul(a,a,a); k/=2; } mul(re,T2, re); return re; } void mul(Vec a, Mat b, Vec &c) { REP(j,4) { c[j] = 0; REP(k,4) { c[j] += a[k] * b[k][j]; } c[j] %= mod; } } signed main(){ IOS(); int n,m; cin>>n>>m; Vec re = {1,0,0,0}; int prv = 0; REP(i,m) { int x; cin>>x; mul(re, trans(x-prv), re); prv = x; } mul(re, trans(n-prv), re); cout<<re[3]<<endl; } ``` ---- --- ## 例題 3: NOI 2020 day 1 美食家 ![](https://i.imgur.com/8MOup0e.png =400x400) ---- 邊權 <= 5 n <= 50 m <= 501 k <= 200 T <= 10^9 c_i <= 50000 ---- 重新定義乘法: $$ A \cdot B = C \iff $$ $$ C_i,_j = \rm Max (A_i,_k + B_k,_j) $$ 它同樣滿足 $$ ( A \cdot B ) \cdot C = A \cdot (B \cdot C) $$ 所以可以套快速幂 ---- ## 好想用矩陣如果邊相同可以用矩陣轉移如果從時間轉移對每個 $a$ to $b$ 的邊，將轉移矩陣上 $M_a,_b$ 的值設為走完這個邊會得到的值，或 $c_b$ 則可以以矩陣快速冪快速跳過一段時間的轉移。 ---- # 轉換原圖邊權只有到5, 不拆真虧！但要怎麼拆？ ---- ## 好像有點慢 k 次訪問，每次要做 $O((5n)^3) * log_2(T)$ 次操作。估計一下： $200 \cdot 250^3 \cdot 22 = 6.8 \times 10^{10}$... 一定不會過 ---- ## 怎麼加速？注意矩陣乘法很慢 ($250^3$)，但向量乘矩陣很快 ($250^2$)。預處理完所有 $2^k$ 的矩陣次方後，每次轉換只需要log2(時差)次向量與矩陣乘法。估計一下： $250^3 \cdot 30 + 250^2 \cdot 200 \cdot 22 = 7.4 \times 10^8$... 比剛剛快了100倍，可能可以過！ ---- ## 其實可以過 ```c #include <bits/stdc++.h> #pragma GCC optimize("O3", "unroll-loops") using namespace std; #define ll long long #define pii pair<int, int> #define ull unsigned ll #define f first #define s second #define ALL(x) x.begin(),x.end() #define SZ(x) (int)x.size() #define SQ(x) (x)*(x) #define MN(a,b) a = min(a,(__typeof__(a))(b)) #define MX(a,b) a = max(a,(__typeof__(a))(b)) #define pb push_back #define SORT_UNIQUE(c) (sort(c.begin(),c.end()), c.resize(distance(c.begin(),unique(c.begin(),c.end())))) #ifdef BALBIT #define IOS() #define bug(...) fprintf(stderr,"#%d (%s) = ",__LINE__,#__VA_ARGS__),_do(__VA_ARGS__); template<typename T> void _do(T &&x){cerr<<x<<endl;} template<typename T, typename ...S> void _do(T &&x, S &&...y){cerr<<x<<", ";_do(y...);} #else #define IOS() ios_base::sync_with_stdio(0);cin.tie(0); #define endl '\n' #define bug(...) #endif const int iinf = 1e9+10; const ll inf = 1ll<<60; const ll mod = 1e9+7 ; void GG(){cout<<"0\n"; exit(0);} ll mpow(ll a, ll n, ll mo = mod){ // a^n % mod ll re=1; while (n>0){ if (n&1) re = re*a %mo; a = a*a %mo; n>>=1; } return re; } ll inv (ll b, ll mo = mod){ if (b==1) return b; return (mo-mo/b) * inv(mo%b,mo) % mo; } const int maxn = 1e6+5; const int asz = 50*5; typedef array<ll, asz> Vec; typedef array<Vec, asz> Mat; int n,m,T,k; void mul(Mat & a, Mat & b, Mat &c) { for (int i = 0; i < asz; ++i) { for (int j = 0; j < asz; ++j) { c[i][j] = -10000000000000000ll; for (int k = 0; k<asz; ++k) { c[i][j] = max(c[i][j], a[i][k] + b[k][j]); } } } } void mul2(Vec a, Mat b, Vec &c) { for (int i = 0; i<asz; ++i) { c[i] = -10000000000000000ll; for (int j = 0; j< asz; ++j) { c[i] = max(c[i], a[j] + b[j][i]); } } } Mat M; int Cv[maxn]; Mat p2[31]; signed main(){ IOS(); #ifndef BALBIT freopen("delicacy.in", "r", stdin); freopen("delicacy.out", "w", stdout); #endif // BALBIT cin>>n>>m>>T>>k; for (int i = 0; i<SZ(M); ++i) for (int j = 0; j<SZ(M[0]); ++j) M[i][j] = -10000000000000000ll; for (int i = 0; i<n; ++i) cin>>Cv[i]; for (int i = 0; i<n; ++i) { for (int j = 0; j<4; ++j) M[i*5+j][i*5+j+1] = 0; } for (int i = 0; i<m; ++i) { int a,b,w; cin>>a>>b>>w; --a; --b; MX(M[a*5+w-1][b*5], Cv[b]); } p2[0] = M; for (int i = 1; i<=30; ++i) { mul(p2[i-1], p2[i-1], p2[i]); } Vec tp; fill(ALL(tp), -10000000000000000ll); tp[0] = Cv[0]; vector<pair<int, pii> > ev; ev.pb({T, {0,0}}); for (int i = 0; i<k; ++i) { int tt, x, y; cin>>tt>>x>>y; ev.pb({tt,{x-1,y}}); } sort(ALL(ev)); int pv = 0; for (auto & p : ev) { for (int i = 0; i<=30; ++i) { if ((p.f-pv) & (1<<i)) { mul2(tp, p2[i], tp); } } pv = p.f; tp[p.s.f*5] += p.s.s; } cout<<max(tp[0],-1ll)<<endl; } ``` ``` ``` --- ## 例題 4: 給你一個無向圖, 一開始隨機取一條邊, 走到它隨機連到的一個節點每個點上有一個值$c_i$ 接下來k 步中以平均機率走到相鄰的一個點求路徑上$c_i$的和的期望值 $n \leq 100$ $k \leq 10^{18}$ ---- ### 那如果n到10^5? ---- # 不要矩陣中毒 >_< --- # The End