https://github.com/citusdata/postgresql-topn
https://www.citusdata.com/blog/2018/03/27/topn-for-your-postgres-database/
從 github 下載, 會存成 zip, 解壓縮.
unzip postgresql-topn-master.zip
目前的版本是 2.3.1
topn v2.3.1 (November 27, 2020)
Adds PostgreSQL 13 support
Supports parallel queries and aggregates
make
make install
/bin/sh /usr/local/lib/postgresql/pgxs/src/makefiles/../../config/install-sh -c -d '/usr/local/share/postgresql/extension'
/bin/sh /usr/local/lib/postgresql/pgxs/src/makefiles/../../config/install-sh -c -d '/usr/local/share/postgresql/extension'
/bin/sh /usr/local/lib/postgresql/pgxs/src/makefiles/../../config/install-sh -c -d '/usr/local/lib/postgresql'
/usr/bin/install -c -m 644 .//topn.control '/usr/local/share/postgresql/extension/'
/usr/bin/install -c -m 644 .//topn--2.0.0--2.1.0.sql .//topn--2.1.0--2.2.0.sql .//topn--2.2.0--2.2.1.sql .//topn--2.2.1--2.2.2.sql .//topn--2.2.2--2.3.0.sql .//topn--2.3.0--2.3.1.sql topn--2.0.0.sql topn--2.1.0.sql topn--2.2.0.sql topn--2.2.1.sql topn--2.2.2.sql topn--2.3.0.sql topn--2.3.1.sql '/usr/local/share/postgresql/extension/'
/usr/bin/install -c -m 755 topn.so '/usr/local/lib/postgresql/'
|=> psql -U pagila
psql (13.1)
Type "help" for help.
pagila[pagila]# create extension topn with schema public;
CREATE EXTENSION
Time: 266.651 ms
pagila[pagila]#* commit;
COMMIT
Time: 5.081 ms
pagila[pagila]# \dx topn
List of installed extensions
Name | Version | Schema | Description
------+---------+--------+----------------------
topn | 2.3.1 | public | type for top-n JSONB
(1 row)
pagila[pagila]# \dx+ topn
Objects in extension "topn"
Object description
-------------------------------------------------
function topn(jsonb,integer)
function topn_add(jsonb,text)
function topn_add_agg(text)
function topn_add_trans(internal,text)
function topn_deserialize(bytea,internal)
function topn_pack(internal)
function topn_serialize(internal)
function topn_union(jsonb,jsonb)
function topn_union_agg(jsonb)
function topn_union_internal(internal,internal)
function topn_union_trans(internal,jsonb)
operator +(jsonb,jsonb)
type topn_record
(13 rows)
pagila[pagila]# \df topn*
List of functions
Schema | Name | Result data type | Argument data types | Type
--------+---------------------+-------------------+---------------------+------
public | topn | SETOF topn_record | jsonb, integer | func
public | topn_add | jsonb | jsonb, text | func
public | topn_add_agg | jsonb | text | agg
public | topn_add_trans | internal | internal, text | func
public | topn_deserialize | internal | bytea, internal | func
public | topn_pack | jsonb | internal | func
public | topn_serialize | bytea | internal | func
public | topn_union | jsonb | jsonb, jsonb | func
public | topn_union_agg | jsonb | jsonb | agg
public | topn_union_internal | internal | internal, internal | func
public | topn_union_trans | internal | internal, jsonb | func
(11 rows)
wget http://examples.citusdata.com/customer_reviews_2000.csv.gz
gzip -d customer_reviews_2000.csv.gz
CREATE TABLE customer_reviews
(
customer_id TEXT,
review_date DATE,
review_rating INTEGER,
review_votes INTEGER,
review_helpful_votes INTEGER,
product_id CHAR(10),
product_title TEXT,
product_sales_rank BIGINT,
product_group TEXT,
product_category TEXT,
product_subcategory TEXT,
similar_product_ids CHAR(10)[]
);
\COPY customer_reviews FROM 'customer_reviews_2000.csv' WITH CSV;
commit;
會出現 COPY 1260658 的訊息
-- create a roll-up table to capture most popular products
create table popular_products (
review_date date UNIQUE
, agg_data jsonb
);
-- Create different summaries by grouping top reviews for each date (day, month, year)
insert into popular_products
select review_date
, topn_add_agg(product_id)
from customer_reviews
group by review_date;
commit;
select review_date
, topn(agg_data, 1)
, (topn(agg_data, 1)).*
from popular_products
limit 5;
review_date | topn | item | frequency
-------------+-----------------+------------+-----------
2000-01-16 | (0151004439,15) | 0151004439 | 15
2000-01-06 | (B00000JJCZ,13) | B00000JJCZ | 13
2000-01-30 | (0807281956,10) | 0807281956 | 10
2000-01-05 | (B00000JJCZ,17) | B00000JJCZ | 17
2000-02-05 | (630575070X,17) | 630575070X | 17
(5 rows)
select review_date
, topn(agg_data, 1)
, (topn(agg_data, 1)).*
from popular_products
order by frequency desc
limit 10;
review_date | topn | item | frequency
-------------+------------------+------------+-----------
2000-07-09 | (0786229276,705) | 0786229276 | 705
2000-07-09 | (0807282596,705) | 0807282596 | 705
2000-07-08 | (0939173379,493) | 0939173379 | 493
2000-07-08 | (0807282588,493) | 0807282588 | 493
2000-07-10 | (0807282588,434) | 0807282588 | 434
2000-07-10 | (0786229276,434) | 0786229276 | 434
2000-10-03 | (B00004Z511,243) | B00004Z511 | 243
2000-10-03 | (B00004XONN,243) | B00004XONN | 243
2000-07-11 | (0939173379,179) | 0939173379 | 179
2000-07-11 | (0439139600,179) | 0439139600 | 179
(10 rows)
select review_date
, topn(agg_data, 10)
, (topn(agg_data, 10)).*
from popular_products
order by frequency desc
limit 20;
review_date | topn | item | frequency
-------------+------------------+------------+-----------
2000-07-09 | (0939173379,705) | 0939173379 | 705
2000-07-09 | (0807282588,705) | 0807282588 | 705
2000-07-09 | (0786229276,705) | 0786229276 | 705
2000-07-09 | (0439139597,705) | 0439139597 | 705
2000-07-09 | (0439139600,705) | 0439139600 | 705
2000-07-09 | (0807282596,705) | 0807282596 | 705
2000-07-08 | (0939173379,493) | 0939173379 | 493
2000-07-08 | (0786229276,493) | 0786229276 | 493
2000-07-08 | (0807282596,493) | 0807282596 | 493
2000-07-08 | (0807282588,493) | 0807282588 | 493
2000-07-08 | (0439139597,493) | 0439139597 | 493
2000-07-08 | (0439139600,493) | 0439139600 | 493
2000-07-10 | (0439139600,434) | 0439139600 | 434
2000-07-10 | (0939173379,434) | 0939173379 | 434
2000-07-10 | (0439139597,434) | 0439139597 | 434
2000-07-10 | (0786229276,434) | 0786229276 | 434
2000-07-10 | (0807282596,434) | 0807282596 | 434
2000-07-10 | (0807282588,434) | 0807282588 | 434
2000-10-03 | (B00004XONN,243) | B00004XONN | 243
2000-10-03 | (B00004Z511,243) | B00004Z511 | 243
(20 rows)
-- 使用 topn_union_agg 來聚合數個
select (topn(topn_union_agg(agg_data), 10)).*
from popular_products
where review_date >= date '2000-01-01'
and review_date < date '2000-02-01'
order by 2 desc;
item | frequency
------------+-----------
0375408738 | 217
0375404368 | 217
0345417623 | 217
0375404376 | 217
0807281751 | 204
0939173344 | 204
043936213X | 204
0807286001 | 204
0807281956 | 204
0786222727 | 204
(10 rows)
-- 一年期間 每月的前兩名
select date_trunc('month', review_date)::date as review_month
, (topn(topn_union_agg(agg_data), 2)).*
from popular_products
where review_date >= date '2000-01-01'
and review_date < date '2001-01-01'
group by review_month
order by review_month, frequency;
review_month | item | frequency
--------------+------------+-----------
2000-01-01 | 0375408738 | 217
2000-01-01 | 0345417623 | 217
2000-02-01 | 0440236673 | 291
2000-02-01 | 0375409726 | 291
2000-03-01 | B00004NRPZ | 635
2000-03-01 | B00004NRU3 | 637
2000-04-01 | B0000667J7 | 399
2000-04-01 | B00003CX5P | 399
2000-05-01 | B00004SCX6 | 380
2000-05-01 | B00004T1HK | 380
2000-06-01 | B00004SCX6 | 202
2000-06-01 | B00004TL21 | 259
2000-07-01 | 0786229276 | 2734
2000-07-01 | 0807282596 | 2736
2000-08-01 | 0786229276 | 350
2000-08-01 | 0807282596 | 350
2000-09-01 | B00007CWQI | 238
2000-09-01 | 039914563X | 238
2000-10-01 | B00004XONN | 961
2000-10-01 | B00004Z511 | 962
2000-11-01 | B000050I5N | 343
2000-11-01 | B000050HSL | 343
2000-12-01 | B00005NBAU | 224
2000-12-01 | B00004YS7F | 230
(24 rows)
https://github.com/citusdata/postgresql-topn#config-settings
show topn.number_of_counters;
topn.number_of_counters
-------------------------
1000
set topn.number_of_counters to 100;
commit;
-- Create table to insert summaries.
create table popular_products2 (
review_summary jsonb
, year int not null
, month int not null
);
-- Create different summaries by grouping the reviews according to their year and month.
insert into popular_products2
select topn_add_agg(product_id)
, extract(year from review_date)::int as year
, extract(month from review_date)::int as month
from customer_reviews
group by year, month;
INSERT 0 12
Time: 3113.382 ms (00:03.113)
commit;
https://github.com/citusdata/postgresql-topn#topn_union_aggtopntypecolumn
This is the aggregate for union operation. It merges the JSONB counter lists and returns the final JSONB which stores overall result.
create table overall_result (
merged_summary jsonb
);
insert into overall_result
select topn_union_agg(review_summary)
from popular_products2;
INSERT 0 1
Time: 50.210 ms
select (topn(merged_summary, 20)).*
from overall_result
order by 2 desc, 1;
item | frequency
------------+-----------
0807282596 | 3282
0807282588 | 3278
0439139597 | 3277
0786229276 | 3277
0939173379 | 3276
0439139600 | 3272
B00004NRU3 | 1274
B00004NRPZ | 1254
B00004Z511 | 1181
B00004XONN | 1179
0590353403 | 1134
0807281751 | 1129
0807286001 | 1092
0807281956 | 1034
0786222727 | 1029
043936213X | 981
0939173344 | 974
630575070X | 959
6305750750 | 958
630575067X | 939
(20 rows)
create table daily_populars (
date date
, agg_data jsonb
);
insert into daily_populars
select date_trunc('day', review_date)
, topn_add_agg(product_id)
from customer_reviews
group by 1;
commit;
select date
, topn_union_agg(agg_data) over seven_days
from daily_populars
window seven_days as (order by date asc rows 6 preceding)
order by 1
limit 10;
with t(date, merged7) as (
select date
, topn_union_agg(agg_data) over seven_days
from daily_populars
window seven_days as (order by date asc rows 6 preceding)
order by 1
limit 10
)
select date
, (topn(merged7, 5)).*
from t;
date | item | frequency
------------+------------+-----------
2000-01-01 | 0939173344 | 12
2000-01-01 | 0807286001 | 12
2000-01-01 | 043936213X | 12
2000-01-01 | 0807281956 | 12
2000-01-01 | 0590353403 | 12
2000-01-02 | 0807282324 | 19
2000-01-02 | 0807282316 | 19
2000-01-02 | 0807286028 | 19
2000-01-02 | 0439136369 | 19
2000-01-02 | 0747545111 | 19
2000-01-03 | 043936213X | 27
2000-01-03 | 0590353403 | 27
2000-01-03 | 0807281751 | 27
2000-01-03 | 0786222727 | 27
2000-01-03 | 0939173344 | 27
2000-01-04 | 0345417623 | 39
2000-01-04 | 0375408738 | 39
2000-01-04 | 0375404376 | 39
2000-01-04 | 0375404368 | 39
2000-01-04 | 0939173344 | 35
2000-01-05 | 0375408738 | 47
2000-01-05 | 0375404368 | 47
2000-01-05 | 0375404376 | 47
2000-01-05 | 0345417623 | 47
2000-01-05 | 0786222727 | 40
2000-01-06 | 0375404376 | 54
2000-01-06 | 0375408738 | 54
2000-01-06 | 0375404368 | 54
2000-01-06 | 0345417623 | 54
2000-01-06 | 0939173344 | 46
2000-01-07 | 0375408738 | 60
2000-01-07 | 0345417623 | 60
2000-01-07 | 0375404368 | 60
2000-01-07 | 0375404376 | 60
2000-01-07 | 0807281751 | 52
2000-01-08 | 0375408738 | 63
2000-01-08 | 0375404376 | 63
2000-01-08 | 0375404368 | 63
2000-01-08 | 0345417623 | 63
2000-01-08 | 0060392983 | 52
2000-01-09 | 0345417623 | 76
2000-01-09 | 0375408738 | 76
2000-01-09 | 0375404368 | 76
2000-01-09 | 0375404376 | 76
2000-01-09 | 0060392983 | 66
2000-01-10 | 0060392983 | 85
2000-01-10 | 006103116X | 85
2000-01-10 | 0375404376 | 72
2000-01-10 | 0375404368 | 72
2000-01-10 | 0375408738 | 72
(50 rows)
看到有人發問 https://www.facebook.com/groups/taiwanmysqlusergroup/permalink/1852399401574463/ 建立測試資料 create table t0416 ( id int not null auto_increment primary key , col varchar(12) not null ); insert into t0416 (col) values
Apr 16, 2021列出函數 \df topn* List of functions Schema | Name | Result data type | Argument data types | Type --------+---------------------+-------------------+---------------------+------ public | topn | SETOF topn_record | jsonb, integer | func public | topn_add | jsonb | jsonb, text | func public | topn_add_agg | jsonb | text | agg public | topn_add_trans | internal | internal, text | func public | topn_deserialize | internal | bytea, internal | func
Mar 19, 2021or
By clicking below, you agree to our terms of service.
New to HackMD? Sign up