# 105模擬題目 ## 999004 - 使用者個人資料 > userprofile = LOAD '/dataset/999004/999004.tsv' USING PigStorage('\t') AS (userid: chararray, gender:chararray, age: int, country:chararray, registered: chararray); ``` 欄位名稱 使用者id userid chararray 性別 gender chararray 年齡 age int 國家 country chararray 註冊時間 registered chararray ``` ### 會員年齡層分布,第一個欄位請一定依照下方樣式輸出 > userprofile = LOAD '/dataset/999004/999004.tsv' USING PigStorage('\t') AS (userid: chararray, gender:chararray, age: int, country:chararray, registered: chararray); filter_userprofile = FILTER userprofile BY $2 < 20; gp_userprofile = GROUP filter_userprofile ALL; count_userprofile = FOREACH gp_userprofile GENERATE $0,COUNT($1); dump count_userprofile; ``` (all,44) ``` > userprofile = LOAD '/dataset/999004/999004.tsv' USING PigStorage('\t') AS (userid: chararray, gender:chararray, age: int, country:chararray, registered: chararray); filter_userprofile = FILTER userprofile BY $2 > 19 AND NOT $2 > 39; gp_userprofile = GROUP filter_userprofile ALL; count_userprofile = FOREACH gp_userprofile GENERATE $0,COUNT($1); dump count_userprofile; ``` (all,256) ``` > userprofile = LOAD '/dataset/999004/999004.tsv' USING PigStorage('\t') AS (userid: chararray, gender:chararray, age: int, country:chararray, registered: chararray); filter_userprofile = FILTER userprofile BY $2 > 39 AND NOT $2 > 64; gp_userprofile = GROUP filter_userprofile ALL; count_userprofile = FOREACH gp_userprofile GENERATE $0,COUNT($1); dump count_userprofile; ``` (all,14) ``` > userprofile = LOAD '/dataset/999004/999004.tsv' USING PigStorage('\t') AS (userid: chararray, gender:chararray, age: int, country:chararray, registered: chararray); filter_userprofile = FILTER userprofile BY $2 > 64; gp_userprofile = GROUP filter_userprofile ALL; count_userprofile = FOREACH gp_userprofile GENERATE $0,COUNT($1); dump count_userprofile; ``` (all,5) Ans: (0-19,44) (20-39,256) (40-64,14) (65--,5) ``` ## 999005 - 使用者點聽紀錄 > userdemand = LOAD '/dataset/999005/999005.tsv' USING PigStorage('\t') AS (userid: chararray, time:chararray, artname: chararray, traname:chararray); ``` 欄位名稱 使用者id userid chararray 點聽時間 time chararray 歌手名稱 artname chararray 歌曲名稱 traname chararray ``` ### 那一天是會員透過我們的音樂網站聆聽音樂最多的一天 ? > userdemand = LOAD '/dataset/999005/999005.tsv' USING PigStorage('\t') AS (userid: chararray, time:chararray, artname: chararray, traname:chararray); sub_userdemand = FOREACH userdemand GENERATE SUBSTRING($1,0,10),$2,$3; gp_userdemand = GROUP sub_userdemand BY $0; count_userdemand = FOREACH gp_userdemand GENERATE $0,COUNT($1); asc_userdemand = ORDER count_userdemand BY $1 ASC; dump asc_userdemand; ``` (2008-09-24,250) ``` ## 999004 跟 999005 均會用到 ### 男性與女性的會員數量與總點播次數 > userdemand = LOAD '/dataset/999005/999005.tsv' USING PigStorage('\t') AS (userid: chararray, time:chararray, artname: chararray, traname:chararray); userprofile = LOAD '/dataset/999004/999004.tsv' USING PigStorage('\t') AS (userid: chararray, gender:chararray, age: int, country:chararray, registered: chararray); join1 = JOIN userdemand BY $0, userprofile BY $0; gp_userprofile = GROUP join1 BY $5; count_userprofile = FOREACH gp_userprofile GENERATE $0, COUNT($1); dump count_userprofile; ``` (f,52636) (m,102146) ``` > userprofile = LOAD '/dataset/999004/999004.tsv' USING PigStorage('\t') AS (userid: chararray, gender:chararray, age: int, country:chararray, registered: chararray); gp_userprofile = GROUP userprofile BY $1; count_userprofile = FOREACH gp_userprofile GENERATE $0, COUNT($1); dump count_userprofile; ``` (f,137) (m,182) Ans: (f,137,52636) (m,182,102146) ``` ### 會員數量排行顯示前五名的國家,和這些國家的總點播次數 > userdemand = LOAD '/dataset/999005/999005.tsv' USING PigStorage('\t') AS (userid: chararray, time:chararray, artname: chararray, traname:chararray); userprofile = LOAD '/dataset/999004/999004.tsv' USING PigStorage('\t') AS (userid: chararray, gender:chararray, age: int, country:chararray, registered: chararray); join1 = JOIN userdemand BY $0, userprofile BY $0; gp_userprofile1 = GROUP join1 BY $7; count_userprofile1 = FOREACH gp_userprofile1 GENERATE $0, COUNT($1); dump count_userprofile1; ``` (Canada,37110) (France,1618) (Greece,192) (Latvia,727) (Mexico,2991) (Norway,3649) (Poland,10664) (Serbia,994) (Sweden,3147) (Turkey,10707) (Armenia,584) (Belgium,2967) (Croatia,1237) (Estonia,339) (Finland,8409) (Germany,14558) (Hungary,1572) (Ireland,1480) (Morocco,587) (Romania,1265) (Bulgaria,364) (Portugal,1454) (Slovakia,1689) (Thailand,372) (Argentina,3096) (Australia,1760) (Nicaragua,44) (Venezuela,950) (Netherlands,468) (New Zealand,340) (Switzerland,1769) (United States,9255) (Czech Republic,1673) (United Kingdom,2092) (Russian Federation,2348) (Trinidad and Tobago,3223) (Canada Minor Outlying Islands,1089) (British Indian Ocean Territory,649) ``` > userprofile = LOAD '/dataset/999004/999004.tsv' USING PigStorage('\t') AS (userid: chararray, gender:chararray, age: int, country:chararray, registered: chararray); gp_userprofile2 = GROUP userprofile BY $3; count_userprofile2 = FOREACH gp_userprofile2 GENERATE $0, COUNT($1); asc_userprofile2 = ORDER count_userprofile2 BY $1; dump asc_userprofile2; ``` (Canada,60) (Germany,35) (China,34) (Taiwan,20) (Poland,19) (Turkey,14) (United States,12) (Finland,10) ``` > join12 = JOIN count_userprofile1 BY $0,count_userprofile2 BY $0; foreach_join12 = FOREACH join12 GENERATE $0,$3,$1; asc_join12 = ORDER foreach_join12 BY $1 ASC; dump asc_join12; ``` (United States,12,9255) (Turkey,14,10707) (Poland,19,10664) (Germany,35,14558) (Canada,60,37110) ```