Try   HackMD

中文模糊搜尋 pg_trgm vs pg_bigm

結論

應該使用 bigm

  • bigm 在1~2個英文字母的搜尋沒有效能改善,或者說即使有改善還是要跑很久,因為符合搜尋條件的資料量過大。
  • bigm 在1~2個中文字的搜尋有 2~100 倍的效能改善。
  • bigm 在 3 個字以上的耗時是 trgm 的 1.5 倍,但還是很快。
  • trgm 和 bigm 一起用當然是最快的,但是吃寫入和儲存資源

不應該使用 ILIKE 或 lower LIKE

  • ILIKE 或 lower LIKE 使得搜尋時間變成 3 倍,不論是否有使用 index

效能測試結果

  • 在以下情形下適用的索引

    • 搜尋一個英文字母
      • 不使用索引,或者即使使用索引也沒有比較快
    • 搜尋三個英文字母以上
      • 使用 trgm 或 bigm 都很快
      • trgm 的耗時是 bigm 的 60%
      • 不區分大小寫時,需要 3 倍耗時
    • 搜尋一個常見中文字
      • bigm 快 2~5倍
    • 搜尋一個罕見中文字
      • bigm 快 10~100倍
    • 搜尋三個中文字
      • trgm 與 bigm 一樣快
  • 在大部分資料都滿足搜尋條件時,資料庫會不使用索引

    • 輸入一個英文字母時通常不使用索引
    • 輸入兩個英文字母時使用 bigm 不見得可以快多少(滿足條件的資料量太多)
    • 只輸入一個中文字,如果是常見字,可能不使用索引
  • bigm 不支援 ILIKE 怎麼辦

    • ILIKE 的耗時是 LIKE 的 3 倍
    • 無索引在 lower LIKE 的耗時與 ILIKE 相同
    • trgm 在 lower LIKE 的耗時與 ILIKE 相同
    • bigm 在 lower LIKE 時會採用 index
  • 建立完 index 後再跑 Analyze,才會保證在正確的時機採用 index

成本測試結果

  • bigm 需要的空間是 trgm 的 65%
  • 建立索引所需要的時間差不多

效能測試圖表

https://docs.google.com/spreadsheets/d/1ekMoE8DEugwkH-U6xCtAg3J5Yjm4EX8eFLnZ9IljaYY/edit?usp=sharing

左邊 4 欄描述一個 SQL 查詢的欄位以及條件
第 5、6 欄紀錄在無 index 的情況下 SQL 的耗時以及執行方式
第 7、8 欄紀錄在只有 trgm index 的情況下 SQL 的耗時以及執行方式
第 9、10 欄紀錄在只有 bigm index 的情況下 SQL 的耗時以及執行方式
第 11、12 欄紀錄在有 trgm + bigm index 的情況下 SQL 的耗時以及執行方式

LIKE

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →

ILIKE

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →

lower LIKE

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →

索引指令與成本

-- 88409 ms
create index customers_display_name_gin on customers using gin (display_name gin_trgm_ops);
-- 240 mb
select pg_size_pretty(pg_relation_size('customers_display_name_gin'));
drop index customers_display_name_gin;

-- 101087 ms
create index customers_display_name_gin_bigm on customers using gin (display_name gin_bigm_ops);
-- 154 mb
select pg_size_pretty(pg_relation_size('customers_display_name_gin_bigm'));
DROP INDEX customers_display_name_gin_bigm;

-- 93872 ms
create index customers_display_name_gin_lower on customers using gin (lower(display_name) gin_trgm_ops);
-- 240 mb
select pg_size_pretty(pg_relation_size('customers_display_name_gin_lower'));
drop index customers_display_name_gin_lower;

-- 98059 ms
create index customers_display_name_gin_bigm_lower on customers using gin (lower(display_name) gin_bigm_ops);
-- 146 mb
select pg_size_pretty(pg_relation_size('customers_display_name_gin_bigm_lower'));
DROP INDEX customers_display_name_gin_bigm_lower;

測試所使用的 SQL

-- Lax
-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name LIKE '%La%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name LIKE '%Lax%'
OFFSET 100
limit 100;

-- EXPLAIN
SELECT
	COUNT(0)
FROM
	customers
where display_name LIKE '%Lax%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name ILIKE '%Lax%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name ILIKE '%Lax%'
OFFSET 100
limit 100;

-- EXPLAIN
SELECT
	COUNT(0)
FROM
	customers
where display_name ILIKE '%Lax%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where lower(display_name) LIKE '%lax%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where lower(display_name) LIKE '%lax%'
OFFSET 100
limit 100;

-- EXPLAIN
SELECT
	COUNT(0)
FROM
	customers
where lower(display_name) LIKE '%lax%';

-- D
-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name LIKE '%D%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name LIKE '%D%'
OFFSET 100
limit 100;

-- EXPLAIN
SELECT
	COUNT(0)
FROM
	customers
where display_name LIKE '%D%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name ILIKE '%D%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name ILIKE '%D%'
OFFSET 100
limit 100;

-- EXPLAIN
SELECT
	COUNT(0)
FROM
	customers
where display_name ILIKE '%D%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where lower(display_name) LIKE '%d%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where lower(display_name) LIKE '%d%'
OFFSET 100
limit 100;

-- EXPLAIN
SELECT
	COUNT(0)
FROM
	customers
where lower(display_name) LIKE '%d%';


-- Dan
-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name LIKE '%Dan%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name LIKE '%Dan%'
OFFSET 100
limit 100;

-- EXPLAIN
SELECT
	COUNT(0)
FROM
	customers
where display_name LIKE '%Dan%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name ILIKE '%Dan%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name ILIKE '%Dan%'
OFFSET 100
limit 100;

-- EXPLAIN
SELECT
	COUNT(0)
FROM
	customers
where display_name ILIKE '%Dan%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where lower(display_name) LIKE '%dan%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where lower(display_name) LIKE '%dan%'
OFFSET 100
limit 100;

-- EXPLAIN
SELECT
	COUNT(0)
FROM
	customers
where lower(display_name) LIKE '%dan%';


-- Daniel
-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name LIKE '%Daniel%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name LIKE '%Daniel%'
OFFSET 100
limit 100;

-- EXPLAIN
SELECT
	COUNT(0)
FROM
	customers
where display_name LIKE '%Daniel%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name ILIKE '%Daniel%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name ILIKE '%Daniel%'
OFFSET 100
limit 100;

-- EXPLAIN
SELECT
	COUNT(0)
FROM
	customers
where display_name ILIKE '%Daniel%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where lower(display_name) LIKE '%daniel%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where lower(display_name) LIKE '%daniel%'
OFFSET 100
limit 100;

-- EXPLAIN
SELECT
	COUNT(0)
FROM
	customers
where lower(display_name) LIKE '%daniel%';

-- 恩
-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name LIKE '%恩%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name LIKE '%恩%'
OFFSET 100
limit 100;

-- EXPLAIN
SELECT
	COUNT(0)
FROM
	customers
where display_name LIKE '%恩%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name ILIKE '%恩%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name ILIKE '%恩%'
OFFSET 100
limit 100;

-- EXPLAIN
SELECT
	COUNT(0)
FROM
	customers
where display_name ILIKE '%恩%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where lower(display_name) LIKE '%恩%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where lower(display_name) LIKE '%恩%'
OFFSET 100
limit 100;

-- EXPLAIN
SELECT
	COUNT(0)
FROM
	customers
where lower(display_name) LIKE '%恩%';


-- 吳
-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name LIKE '%吳%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name LIKE '%吳%'
OFFSET 100
limit 100;

-- EXPLAIN
SELECT
	COUNT(0)
FROM
	customers
where display_name LIKE '%吳%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name ILIKE '%吳%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name ILIKE '%吳%'
OFFSET 100
limit 100;

-- EXPLAIN
SELECT
	COUNT(0)
FROM
	customers
where display_name ILIKE '%吳%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where lower(display_name) LIKE '%吳%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where lower(display_name) LIKE '%吳%'
OFFSET 100
limit 100;

-- EXPLAIN
SELECT
	COUNT(0)
FROM
	customers
where lower(display_name) LIKE '%吳%';


-- 卡
-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name LIKE '%卡%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name LIKE '%卡%'
OFFSET 100
limit 100;

-- EXPLAIN
SELECT
	COUNT(0)
FROM
	customers
where display_name LIKE '%卡%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name ILIKE '%卡%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name ILIKE '%卡%'
OFFSET 100
limit 100;

-- EXPLAIN
SELECT
	COUNT(0)
FROM
	customers
where display_name ILIKE '%卡%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where lower(display_name) LIKE '%卡%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where lower(display_name) LIKE '%卡%'
OFFSET 100
limit 100;

-- EXPLAIN
SELECT
	COUNT(0)
FROM
	customers
where lower(display_name) LIKE '%卡%';


-- 盧卡斯
-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name LIKE '%盧卡斯%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name LIKE '%盧卡斯%'
OFFSET 100
limit 100;

-- EXPLAIN
SELECT
	COUNT(0)
FROM
	customers
where display_name LIKE '%盧卡斯%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name ILIKE '%盧卡斯%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where display_name ILIKE '%盧卡斯%'
OFFSET 100
limit 100;

-- EXPLAIN
SELECT
	COUNT(0)
FROM
	customers
where display_name ILIKE '%盧卡斯%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where lower(display_name) LIKE '%盧卡斯%';

-- EXPLAIN
SELECT
	id,
	display_name
FROM
	customers
where lower(display_name) LIKE '%盧卡斯%'
OFFSET 100
limit 100;

-- EXPLAIN
SELECT
	COUNT(0)
FROM
	customers
where lower(display_name) LIKE '%盧卡斯%';