讓同一個 SQL 變快
寫不同的 SQL
在 psql 下輸入
CREATE DATABASE performance_test;
在 terminal 下輸入
code /usr/local/var/postgres/postgresql.conf
加入以下內容
shared_preload_libraries = 'pg_stat_statements' # (change requires restart)
pg_stat_statements.max = 1000
pg_stat_statements.track = all
brew services restart postgresql
查看可安裝的 extension,在 psql 下輸入:
\c performance_test
table pg_available_extensions;
查看目前已安裝的 extension
\dx
安裝 pg_stat_statements
CREATE EXTENSION pg_stat_statements;
測試安裝結果
SELECT * FROM pg_stat_statements LIMIT 1;
http://ching119.blogspot.com/2012/06/postgresql-query-plan-bitmap-heap-scan.html
https://kknews.cc/code/gz543le.html
show random_page_cost;
set random_page_cost=1;
建立函數
CREATE FUNCTION random_string(
IN string_length INTEGER,
IN possible_chars TEXT DEFAULT 'abcdefghijklmnopqrstuvwxyz'
)
RETURNS text
LANGUAGE plpgsql
AS $$
DECLARE
output TEXT = '';
i INT4;
pos INT4;
BEGIN
FOR i IN 1..string_length LOOP
pos := 1 + CAST( random() * ( LENGTH(possible_chars) - 1) AS INT4 );
output := output || substr(possible_chars, pos, 1);
END LOOP;
RETURN output;
END;
$$;
建立資料表
CREATE TABLE users
(
id bigint PRIMARY KEY,
name text NOT NULL,
birthday timestamp NOT NULL
);
寫入隨機資料
INSERT INTO users (id, name, birthday)
SELECT
id,
INITCAP(random_string(10)) as name,
now() + random() * interval '-50 year' as birthday
FROM generate_series(1, 10000) id;
explain select * from users;
輸出:
QUERY PLAN
------------------------------------------------------------
Seq Scan on users (cost=0.00..174.00 rows=10000 width=27)
(1 row)
Seq Scan:每一筆資料循序看過一遍
估計啟動成本:0.00
估計總成本:174.00
估計資料列數量:10000 筆
估計一列資料的平均大小:27 bytes
成本的預設單位:以對磁碟頁面讀取1次的成本為基本單位
explain select * from users where id > 9527;
QUERY PLAN
----------------------------------------------------------------------------
Index Scan using users_pkey on users (cost=0.29..23.56 rows=473 width=27)
Index Cond: (id > 9527)
(2 rows)
使用 users_pkey 這個 index 進行 Index Scan
explain select * from users where id > 9527 and birthday < now();
QUERY PLAN
----------------------------------------------------------------------------
Index Scan using users_pkey on users (cost=0.29..25.93 rows=473 width=27)
Index Cond: (id > 9527)
Filter: (birthday < now())
(3 rows)
在 index 找到的東西裡面,每個都做一次 birthday < now() 的過濾檢查
explain select * from users where id < 9527 and birthday < now();
QUERY PLAN
-----------------------------------------------------------
Seq Scan on users (cost=0.00..249.00 rows=9525 width=27)
Filter: ((id < 9527) AND (birthday < now()))
(2 rows)
大概是因為察覺到資料量過大,直接放棄使用 index
CREATE INDEX index_users_name ON users(name);
CREATE INDEX index_users_birthday ON users(birthday);
explain select * from users where id < 9527 and birthday < now() + '-30 years';
QUERY PLAN
---------------------------------------------------------------------------------------
Bitmap Heap Scan on users (cost=83.58..238.44 rows=3851 width=27)
Recheck Cond: (birthday < (now() + '-30 years'::interval))
Filter: (id < 9527)
-> Bitmap Index Scan on index_users_birthday (cost=0.00..82.61 rows=4043 width=0)
Index Cond: (birthday < (now() + '-30 years'::interval))
(5 rows)
Bitmap Index Scan: 看索引 index_users_birthday 找出所有滿足日期條件的紀錄的參考
Bitmap Heap Scan: 拿參考的東西取出 users 表格當中的東西,在過程中加入篩選 id
explain select * from users where id > 4527 and birthday < now() + '-30 years' and name < 'G';
QUERY PLAN
-----------------------------------------------------------------------------------
Bitmap Heap Scan on users (cost=57.65..183.38 rows=509 width=27)
Recheck Cond: (name < 'G'::text)
Filter: ((id > 4527) AND (birthday < (now() + '-30 years'::interval)))
-> Bitmap Index Scan on index_users_name (cost=0.00..57.53 rows=2299 width=0)
Index Cond: (name < 'G'::text)
(5 rows)
Query Planner 會根據自己對於資料分布的了解來選擇最有效率的索引,無法同時使用兩個索引
CREATE TABLE follows
(
id bigint PRIMARY KEY,
user_id bigint NOT NULL,
follower_id bigint NOT NULL,
CONSTRAINT fk_user_id FOREIGN KEY (user_id) REFERENCES users (id),
CONSTRAINT fk_follower_id FOREIGN KEY (follower_id) REFERENCES users (id)
);
寫入隨機資料
INSERT INTO follows (id, user_id, follower_id)
SELECT
id,
floor(random()*10000)+1 as user_id,
floor(random()*10000)+1 as follower_id
FROM generate_series(1, 1000000) id;
explain select * from follows where user_id = 5 and follower_id = 12;
QUERY PLAN
---------------------------------------------------------------------------
Gather (cost=1000.00..13620.10 rows=1 width=24)
Workers Planned: 2
-> Parallel Seq Scan on follows (cost=0.00..12620.00 rows=1 width=24)
Filter: ((user_id = 5) AND (follower_id = 12))
(4 rows)
有兩個 worker 同時在對 follows 做 Seq Scan
CREATE INDEX index_follows_user_id ON follows(user_id);
CREATE INDEX index_follows_follower_id ON follows(follower_id);
CREATE INDEX index_follows_user_id_and_follower_id ON follows(user_id, follower_id);
explain select * from follows where user_id = 5 and follower_id = 12;
QUERY PLAN
------------------------------------------------------------------------------------------------------
Index Scan using index_follows_user_id_and_follower_id on follows (cost=0.42..8.45 rows=1 width=24)
Index Cond: ((user_id = 5) AND (follower_id = 12))
(2 rows)
無法對使用多個 index 但是可以使用一個多欄位 index
explain analyze
select u.*, f.*
from users u
join follows on u.id = follows.user_id
join users f on f.id = follows.follower_id
where u.birthday > now() + '-10 years'
and f.birthday < now() + '-40 years'
order by u.id, f.id
limit 100
;
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=1.00..238.01 rows=100 width=54) (actual time=8.223..54.228 rows=100 loops=1)
-> Nested Loop (cost=1.00..91915.80 rows=38781 width=54) (actual time=8.221..54.198 rows=100 loops=1)
-> Merge Join (cost=0.71..29926.52 rows=199900 width=35) (actual time=5.478..29.926 rows=527 loops=1)
Merge Cond: (follows.user_id = u.id)
-> Index Only Scan using index_follows_user_id_and_follower_id on follows (cost=0.42..25223.42 rows=1000000 width=16) (actual time=0.689..27.090 rows=3768 loops=1)
Heap Fetches: 3768
-> Index Only Scan using index_users_id_name_birthday on users u (cost=0.29..199.09 rows=1999 width=27) (actual time=2.113..2.121 rows=6 loops=1)
Index Cond: (birthday > (now() + '-10 years'::interval))
Heap Fetches: 6
-> Index Scan using users_pkey on users f (cost=0.29..0.31 rows=1 width=27) (actual time=0.045..0.045 rows=0 loops=527)
Index Cond: (id = follows.follower_id)
Filter: (birthday < (now() + '-40 years'::interval))
Rows Removed by Filter: 1
Planning Time: 16.823 ms
Execution Time: 54.305 ms
explain analyze
select u.*, f.*
from users u
join follows on u.id = follows.user_id
join users f on f.id = follows.follower_id
where u.birthday > now() + '-10 years'
and f.birthday < now() + '-40 years'
order by u.birthday, f.birthday
limit 100
;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=16344.37..16344.62 rows=100 width=54) (actual time=257.170..257.191 rows=100 loops=1)
-> Sort (cost=16344.37..16441.32 rows=38781 width=54) (actual time=257.169..257.176 rows=100 loops=1)
Sort Key: u.birthday, f.birthday
Sort Method: top-N heapsort Memory: 48kB
-> Hash Join (cost=140.67..14862.19 rows=38781 width=54) (actual time=3.456..242.331 rows=38543 loops=1)
Hash Cond: (follows.user_id = u.id)
-> Nested Loop (cost=0.42..14212.48 rows=194000 width=35) (actual time=0.058..193.003 rows=193201 loops=1)
-> Seq Scan on users f (cost=0.00..249.00 rows=1940 width=27) (actual time=0.039..4.795 rows=1938 loops=1)
Filter: (birthday < (now() + '-40 years'::interval))
Rows Removed by Filter: 8062
-> Index Scan using index_follows_follower_id on follows (cost=0.42..6.20 rows=100 width=16) (actual time=0.005..0.079 rows=100 loops=1938)
Index Cond: (follower_id = f.id)
-> Hash (cost=115.26..115.26 rows=1999 width=27) (actual time=3.382..3.382 rows=1998 loops=1)
Buckets: 2048 Batches: 1 Memory Usage: 141kB
-> Index Scan using index_users_birthday on users u (cost=0.29..115.26 rows=1999 width=27) (actual time=0.017..1.435 rows=1998 loops=1)
Index Cond: (birthday > (now() + '-10 years'::interval))
Planning Time: 1.545 ms
Execution Time: 257.267 ms
explain analyze
select u.*, f.*
from users u
join follows on u.id = follows.user_id
join users f on f.id = follows.follower_id
where u.birthday > now() + '-10 years'
and f.birthday < now() + '-40 years'
order by u.birthday, f.birthday
limit 100
offset 10000
;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=17660.43..17660.68 rows=100 width=54) (actual time=335.162..335.209 rows=100 loops=1)
-> Sort (cost=17635.43..17732.38 rows=38781 width=54) (actual time=332.962..334.485 rows=10100 loops=1)
Sort Key: u.birthday, f.birthday
Sort Method: top-N heapsort Memory: 2985kB
-> Hash Join (cost=140.67..14862.19 rows=38781 width=54) (actual time=2.420..298.591 rows=38543 loops=1)
Hash Cond: (follows.user_id = u.id)
-> Nested Loop (cost=0.42..14212.48 rows=194000 width=35) (actual time=0.020..244.729 rows=193201 loops=1)
-> Seq Scan on users f (cost=0.00..249.00 rows=1940 width=27) (actual time=0.012..9.179 rows=1938 loops=1)
Filter: (birthday < (now() + '-40 years'::interval))
Rows Removed by Filter: 8062
-> Index Scan using index_follows_follower_id on follows (cost=0.42..6.20 rows=100 width=16) (actual time=0.006..0.093 rows=100 loops=1938)
Index Cond: (follower_id = f.id)
-> Hash (cost=115.26..115.26 rows=1999 width=27) (actual time=2.386..2.386 rows=1998 loops=1)
Buckets: 2048 Batches: 1 Memory Usage: 141kB
-> Index Scan using index_users_birthday on users u (cost=0.29..115.26 rows=1999 width=27) (actual time=0.019..2.018 rows=1998 loops=1)
Index Cond: (birthday > (now() + '-10 years'::interval))
Planning Time: 0.468 ms
Execution Time: 335.270 ms
create materialized view mv_user_and_follower as
select
u.id user_id,
u.name user_name,
u.birthday user_birthday,
f.id follower_id,
f.name follower_name,
f.birthday follower_birthday
from users u
join follows on u.id = follows.user_id
join users f on f.id = follows.follower_id
EXPLAIN ANALYZE
select *
from mv_user_and_follower
where user_birthday > now() + '-10 years'
and follower_birthday < now() + '-40 years'
order by user_birthday, follower_birthday
limit 100
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=37801.50..37801.75 rows=100 width=54) (actual time=407.964..407.982 rows=100 loops=1)
-> Sort (cost=37801.50..37895.53 rows=37612 width=54) (actual time=407.963..407.969 rows=100 loops=1)
Sort Key: user_birthday, follower_birthday
Sort Method: top-N heapsort Memory: 47kB
-> Seq Scan on mv_user_and_follower (cost=0.00..36364.00 rows=37612 width=54) (actual time=0.042..399.039 rows=38543 loops=1)
Filter: ((user_birthday > (now() + '-10 years'::interval)) AND (follower_birthday < (now() + '-40 years'::interval)))
Rows Removed by Filter: 961457
Planning Time: 0.081 ms
Execution Time: 408.005 ms
變成單純的 Seq Scan
EXPLAIN ANALYZE
select *
from mv_user_and_follower
where user_birthday > now() + '-10 years'
and follower_birthday < now() + '-40 years'
order by user_birthday, follower_birthday
limit 100
offset 10000
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=39078.65..39078.90 rows=100 width=54) (actual time=426.145..426.173 rows=100 loops=1)
-> Sort (cost=39053.65..39147.68 rows=37612 width=54) (actual time=423.449..425.439 rows=10100 loops=1)
Sort Key: user_birthday, follower_birthday
Sort Method: top-N heapsort Memory: 2994kB
-> Seq Scan on mv_user_and_follower (cost=0.00..36364.00 rows=37612 width=54) (actual time=0.036..402.557 rows=38543 loops=1)
Filter: ((user_birthday > (now() + '-10 years'::interval)) AND (follower_birthday < (now() + '-40 years'::interval)))
Rows Removed by Filter: 961457
Planning Time: 0.088 ms
Execution Time: 426.209 ms
沒有變慢多少
CREATE INDEX index_mv_user_and_follower_birthday ON mv_user_and_follower(user_birthday, follower_birthday);
EXPLAIN ANALYZE
select *
from mv_user_and_follower
where user_birthday > now() + '-10 years'
and follower_birthday < now() + '-40 years'
order by user_birthday, follower_birthday
limit 100
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=0.43..38.88 rows=100 width=54) (actual time=0.059..0.202 rows=100 loops=1)
-> Index Scan using index_mv_user_and_follower_birthday on mv_user_and_follower (cost=0.43..14458.62 rows=37612 width=54) (actual time=0.058..0.188 rows=100 loops=1)
Index Cond: ((user_birthday > (now() + '-10 years'::interval)) AND (follower_birthday < (now() + '-40 years'::interval)))
Planning Time: 0.101 ms
Execution Time: 0.226 ms
太神啦~
再測個 offset
EXPLAIN ANALYZE
select *
from mv_user_and_follower
where user_birthday > now() + '-10 years'
and follower_birthday < now() + '-40 years'
order by user_birthday, follower_birthday
limit 100
offset 10000
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=3844.47..3882.91 rows=100 width=54) (actual time=19.788..19.942 rows=100 loops=1)
-> Index Scan using index_mv_user_and_follower_birthday on mv_user_and_follower (cost=0.43..14458.62 rows=37612 width=54) (actual time=0.057..19.255 rows=10100 loops=1)
Index Cond: ((user_birthday > (now() + '-10 years'::interval)) AND (follower_birthday < (now() + '-40 years'::interval)))
Planning Time: 0.104 ms
Execution Time: 19.968 ms
快到起飛
create materialized view mv_user_and_follower2 as
select
u.id user_id,
u.name user_name,
u.birthday user_birthday,
cast(floor(EXTRACT(DAY FROM now()-u.birthday)/365/5)*5 as integer) user_age,
f.id follower_id,
f.name follower_name,
f.birthday follower_birthday,
cast(floor(EXTRACT(DAY FROM now()-f.birthday)/365/5)*5 as integer) follower_age
from users u
join follows on u.id = follows.user_id
join users f on f.id = follows.follower_id;
\d mv_user_and_follower2;
CREATE INDEX index_mv_user_and_follower2_birthday ON mv_user_and_follower2(user_age, follower_age, user_birthday, follower_birthday);
確認一下內容
select user_age, count(0) from mv_user_and_follower2 group by 1;
user_age | count
----------+--------
0 | 96911
5 | 102467
10 | 98203
15 | 98513
20 | 90505
25 | 108177
30 | 108161
35 | 103045
40 | 96461
45 | 97057
50 | 500
EXPLAIN ANALYZE
select *
from mv_user_and_follower2
where user_age in (0,5)
and follower_age in (40,45,50)
order by user_birthday, follower_birthday
limit 100
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=15691.29..15691.54 rows=100 width=62) (actual time=53.577..53.598 rows=100 loops=1)
-> Sort (cost=15691.29..15787.92 rows=38654 width=62) (actual time=53.576..53.583 rows=100 loops=1)
Sort Key: user_birthday, follower_birthday
Sort Method: top-N heapsort Memory: 39kB
-> Index Scan using index_mv_user_and_follower2_birthday on mv_user_and_follower2 (cost=0.42..14213.96 rows=38654 width=62) (actual time=0.018..44.697 rows=38644 loops=1)
Index Cond: ((user_age = ANY ('{0,5}'::integer[])) AND (follower_age = ANY ('{40,45,50}'::integer[])))
Planning Time: 0.113 ms
Execution Time: 53.623 ms
看起來是變得很單純,加個 offset 看看:
EXPLAIN ANALYZE
select *
from mv_user_and_follower2
where user_age in (0,5)
and follower_age in (40,45,50)
order by user_birthday, follower_birthday
limit 100
offset 10000
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=17003.12..17003.37 rows=100 width=62) (actual time=66.240..66.262 rows=100 loops=1)
-> Sort (cost=16978.12..17074.76 rows=38654 width=62) (actual time=64.764..65.628 rows=10100 loops=1)
Sort Key: user_birthday, follower_birthday
Sort Method: top-N heapsort Memory: 2819kB
-> Index Scan using index_mv_user_and_follower2_birthday on mv_user_and_follower2 (cost=0.42..14213.96 rows=38654 width=62) (actual time=0.019..34.210 rows=38644 loops=1)
Index Cond: ((user_age = ANY ('{0,5}'::integer[])) AND (follower_age = ANY ('{40,45,50}'::integer[])))
Planning Time: 0.103 ms
Execution Time: 66.305 ms
數據 https://docs.google.com/spreadsheets/d/1ekMoE8DEugwkH-U6xCtAg3J5Yjm4EX8eFLnZ9IljaYY/edit#gid=274134963 結論 建議使用 project_id index 搭配 ILIKE 或 lower LIKE,移除 trgm 或 bigm 直到大型專案的客戶有反應為止。 project_id 的 index 對大部分的粉專有效 trgm 或 bigm 只對超大型粉專有幫助 當搜尋的內容是英文 3 個字時:trgm 有效
Jul 22, 2020結論 應該使用 bigm bigm 在1~2個英文字母的搜尋沒有效能改善,或者說即使有改善還是要跑很久,因為符合搜尋條件的資料量過大。 bigm 在1~2個中文字的搜尋有 2~100 倍的效能改善。 bigm 在 3 個字以上的耗時是 trgm 的 1.5 倍,但還是很快。 trgm 和 bigm 一起用當然是最快的,但是吃寫入和儲存資源 不應該使用 ILIKE 或 lower LIKE
Jul 20, 2020or
By clicking below, you agree to our terms of service.
New to HackMD? Sign up