# Nickname Predict introspection
因為前面實作Predict Nickname的時候,認為是因為斷詞的多樣性不夠導致accuracy 下降,因此這邊來細部檢查model錯誤的原因種類且檢查是否與斷詞有關。
## 綜合原因
1. 大部分漏判涉政(1)的資料 model 皆判斷為正常(0)
2. 金猴爺 會被斷詞切開
[TOC]
## model 漏判 正常(0) 原因
### 人名 被判定為涉政(1)
```python=
[('黄智义', 1)]
[('黄小华', 1)]
[('张文清', 1)]
[('陈密瓜', 1)]
[('陈小小', 1)]
[('张芯', 1)]
[('林王义', 1)]
[('吴金财', 1)]
[('苏国亮', 1)]
[('陈建廷', 1)]
[('陈万', 1)]
[('杜江', 1)]
[('陈玮', 1)]
[('陈维中', 1)]
[('潘信良', 1)]
[('张小玮', 1)]
[('陈国', 1)]
[('罗洪', 1)]
[('王俊闵', 1)]
[('程建智', 1)]
[('沈赟', 1)]
[('王强', 1)]
[('王买军', 1)]
[('施正吉', 1)]
[('韩政', 1)]
```
### 姓氏 被斷詞 斷開 導致姓氏被判定涉政(1) (非全模式)
```python=
[('刘', 1), ('志成', 0)]
[('王', 1)]
[('黎', 1), ('俊杰', 0)]
[('陈毅', 1), ('芳', 0)]
[('王', 1), ('建文', 0)]
[('唐', 1), ('沢', 0), ('贵', 0), ('洋', 0)]
```
### 姓氏 被斷詞 斷開 導致姓氏被判定涉政(1) (全模式)
```python=
[('王', 1), ('宥', 0), ('翔', 0)]
[('李', 1), ('杰', 0), ('杰', 0)]
[('王', 1), ('如', 0)]
[('#', 0), ('鼠', 0), ('王', 1), ('*', 0), ('%', 0), ('>', 0), ('_', 0), ('<¥', 0)]
[('林', 0), ('王', 1), ('义', 0)]
[('刘', 1), ('志成', 0)]
[('吴', 1), ('金', 1), ('财', 0)]
[('胡', 1), ('楠', 0)]
[('刘', 1), ('迪', 0), ('迪', 0)]
[('吴', 1), ('秋', 0), ('云', 0)]
[('王', 1)]
[('王', 1), ('慈', 0), ('岚', 0)]
[('王', 1), ('俊', 0), ('闵', 0)]
[('王', 1), ('建文', 0)]
[('李', 1), ('承', 0), ('骏', 0)]
[('蒋', 1), ('君', 0), ('佩', 0)]
[('王', 1), ('买', 5), ('军', 0)]
[('唐', 1), ('沢', 0), ('贵', 0), ('洋', 0)]
[('蔡', 1), ('雅', 0), ('卿', 0)]
[('唐', 1), ('洲', 0)]
[('王', 1), ('瑜', 0), ('情', 0)]
[('王', 1), ('小', 0), ('岳', 0)]
```
### 裡面牽扯到地名 導致被判定為 涉政(1)
```python=
[('日本', 1), ('男', 0), ('児', 0), ('桃太郎', 0)]
[('施耐', 0), ('徳', 0), ('电气', 0), ('山西', 1), ('分销商', 0)]
[('东北人', 1), ('@', 0), ('文龙', 0)]
[('大溪地', 1), ('的', 0), ('一条', 0), ('鱼', 0)]
[('\ue52b', 0), ('M', 0), ('国', 1), ('K', 0), ('珍', 0), ('T', 0), ('\ue52b', 0)]
[('北京', 1), ('一夜', 0)]
```
### 我們與數美相同 但 Correct 答案為正常
```python=
[('毛', 1), ('爷爷', 0), ('说', 0), ('这个', 0), ('男人', 0), ('很', 0), ('革命', 1)]
[('周斌', 1)]
[('朱德', 1), ('鑫', 1)]
[('刘正恩', 1)]
[('李鹏', 1), ('举', 0)]
[('李月', 1), ('月', 0)]
[('鑫', 1), ('标榜', 0), ('首席', 0), ('总监', 0), ('二师', 0)]
[('邓文贵', 1)]
[('太上皇', 1)]
[('张毅', 1)]
[('陈文贵', 1)]
[('朱德', 1), ('权', 0)]
[('秦涛', 1)]
[('反清复明', 1)]
[('李文贵', 1)]
[('王斌', 1)]
[('邪', 0), ('叫', 0), ('教主不要', 0), ('与', 0), ('邪教', 1), ('苟同', 0)]
[('王仁村', 1)]
[('王仁', 1)]
[('潍坊', 0), ('泥浆', 0), ('土', 0), ('刘淇', 1)]
[('城管', 1)]
[('A', 0), ('.', 0), ('天地家', 0), ('装饰', 0), ('客户经理', 0), ('刘', 1), ('泽东', 1)]
[('光影', 0), ('先', 0), ('生', 0), ('📷', 0), ('克强', 1)]
[('谷', 0), ('克强', 1)]
[('中国', 1), ('梦', 0)]
[('阿库', 0), ('喇嘛', 1), ('塔塔', 0)]
[('高莹', 1)]
[('卓文贵', 1)]
[('王策', 1)]
[('张宏', 1)]
[('包子', 1), ('哥', 0)]
[('张小雷', 1)]
[('张翔', 1)]
[('温', 0), ('克强', 1)]
[('L3', 0), ('朱德', 1), ('鑫', 1), ('℡', 0), ('¹', 0), ('⁷', 0), ('⁶', 0), ('₂', 0), ('₀', 0), ('₃', 0), ('₈', 0), ('⁷', 0), ('º', 0), ('⁴', 0), ('²', 0)]
[('陈刚', 1)]
[('妖言惑众', 1)]
[('宋文贵', 1)]
[('AA', 0), ('汉唐', 1), ('营销', 0), ('经', 0), ('理', 0), ('李鹏', 1), ('举', 0)]
[('李东辉', 1)]
[('草井', 2), ('満', 0), ('子', 0)]
[('彩金', 3)]
[('屁股', 4), ('隆', 0), ('拉西', 0)]
[('幼幼', 4)]
[('欲望', 4)]
[('挡不住的风情', 4)]
[('肉棍', 4), ('仔', 0)]
[('夜袭', 0), ('寡妇村', 4)]
[('欲望', 4), ('\n', 0)]
[('肏', 4), ('捏', 0), ('莽', 0)]
[('朝天', 0), ('呐喊', 0), ('_', 0), ('肏', 4)]
[('批水多', 0), ('口活', 4), ('好', 0)]
[('丁字裤', 4), ('裤', 0)]
[('十八摸', 4), ('丶', 0)]
```
## model 漏判 涉政(1) 原因
### 我們與數美相同 但 Correct 答案為涉政(1)
```python=
[('萧捷', 0)]
[('陈秀卿', 0)]
[('秦世杰', 4)]
# [('泰迪', 0)] # ??
# [('人心', 0), ('叵测', 0), ('人心叵测', 0), ('我', 0), ('保护', 0), ('你', 0), ('@', 0), ('叶', 0)]
# [('简单', 0), ('点', 0)] # ??
[('魂神', 0)]
[('Y', 0), ('li', 0), ('uli', 0), ('-', 0), ('杨淑', 0), ('琴', 0), ('\ue523', 0)]
# [('瘦肉', 0), ('瘦肉精', 0)] 數美未跟上時事
[('林建荣', 0)]
[('杨添福', 0)]
[('涛哥', 0)]
[('李荣奇', 0)]
[('杨彬', 0)]
[('未亡', 0), ('未亡人', 3)]
```
### model 未識別 相關政治人物(數美判斷正確)
```python=
[('俺', 0), ('~', 0), ('习总', 0), ('小舅', 0), ('舅子', 0), ('小舅子', 0)]
[('习老', 0), ('总', 0)]
[('顾三胖', 0), ('(', 0), ('寄卖', 5), ('行', 0), ('、', 0), ('汽车', 0), ('租赁', 0), (')', 0)]
[('文强', 0)]
[('戴耀廷', 0)]
[('请', 0), ('我', 0), ('叫', 0), ('雷锋', 0)]
[('习平', 0)]
[('胡佳', 0)]
[('石涛', 0)]
[('朱丹', 0)]
[('黄琦', 0)]
[('王晓东', 0)]
[('邓晓萍', 0)]
[('雷锋', 0), ('叔叔', 0)]
[('刘琪', 0)]
[('茎', 0), ('正恩', 0)]
[('郝', 0), ('金平', 0)]
[('金平', 0)]
[('家族', 0), ('金氏家族', 0)]
[('🇨', 0), ('🇳', 0), ('雷锋', 0), ('🇨', 0), ('🇳', 0)]
[('雷疯', 0), ('姐姐', 0)]
[('王珉', 0)]
[('魏哲', 0)]
[('杨晶', 0)]
```
## model 漏判 辱罵(2) 原因
### model 沒有學習到 辱罵(2) 字眼
```python=
[('A', 0), ('.', 0), ('💋', 0), ('渣', 0), ('男', 0)]
[('你', 0), ('野爹', 0)]
[('我', 0), ('去', 0), ('🐂', 0), ('屎', 0)]
[('干破', 0), ('你', 0), ('娘', 0), ('老', 0), ('鸡8', 0)]
[('我', 0), ('敲', 0), ('你', 0), ('吗', 0)]
[('沙雕', 0), ('网友', 0), ('呀', 0), ('i', 0)]
[('游戏', 0), ('公司', 0), ('丝全家', 0)]
[('妈哩个', 0), ('b', 0)]
[('风滚草', 0)]
[('渣', 0), ('男', 0)]
[('🦅', 0), ('t', 0), ('m', 0), ('铁蹄', 0), ('踏流', 0), ('年', 0)]
[('沙雕', 0), ('网友', 0)]
[('大意', 0), ('了', 0), ('还', 0), ('以', 0), ('为', 0), ('是', 0), ('个', 0), ('屁', 0)]
[('狗', 0), ('逼', 0), ('不', 0), ('给', 0), ('分', 0)]
[('不要脸', 0)]
[('滚', 0), ('你', 0), ('二爷', 0)]
[('【', 0), ('黑鬼', 0), ('.', 0), ('】', 0), ('🐉', 0)]
[('小明', 0), ('他', 0), ('妈装', 0), ('B', 0)]
[('小混蛋', 0)]
[('人贱', 0)]
[('你', 0), ('爹', 0)]
[('晓贱', 0)]
[('烂', 0), ('人', 0)]
[('渣', 0), ('男', 0), ('锡纸', 0), ('烫', 0)]
[('A', 0), ('.', 0), ('渣', 0), ('男', 0)]
[('是', 0), ('狗', 0), ('吧', 0), ('.', 0)]
[('辉辉', 0), ('skr', 0), ('沙雕', 0)]
[('万贱', 0)]
[('黑鬼', 0), ('👻', 0)]
[('为', 0), ('了', 0), ('你', 0), ('吗', 0), ('比', 0)]
[('sb', 0), ('太', 0), ('多', 0)]
[('破产麻痺', 0)]
[('我妈', 0), ('的', 0), ('儿子', 0), ('真帅', 0), ('i', 0)]
[('狗腿子', 0)]
[('斗', 0), ('TM', 0), ('扯淡', 0)]
[('妈蛋', 0)]
[('黑鬼', 0)]
[('💖', 0), ('我', 0), ('就', 0), ('是', 0), ('渣', 0), ('男', 0), ('啊', 0)]
[('妖', 0), ('也', 0), ('是', 0), ('妖', 0), ('他', 0), ('妈生', 0), ('的', 0)]
[('我', 0), ('可以', 0), ('跪', 0), ('着', 0), ('求', 0), ('你', 0), ('也', 0), ('可以', 0), ('笑', 0), ('着', 0), ('弄死', 0), ('你', 0)]
[('要', 0), ('滚', 0), ('、', 0), ('就', 0), ('滚', 0), ('远点', 0)]
[('拿', 0), ('你', 0), ('当命', 0), ('是', 0), ('我', 0), ('有', 0), ('病', 0)]
[('赢', 0), ('了', 0), ('笑嘻嘻', 0), ('、', 0), ('输', 0), ('了', 0), ('MMP', 0)]
[('玩', 0), ('你', 0), ('嬷', 0), ('个', 0), ('丑', 0), ('逼', 0)]
[('一位', 0), ('不', 0), ('知名', 0), ('的', 0), ('沙雕', 0), ('艺术', 0), ('家', 0)]
[('我', 0), ('TM', 0), ('是', 0), ('阿', 0), ('宝哥', 0)]
[('有', 0), ('病', 0)]
[('狗', 0), ('逼', 0), ('平', 0), ('台', 0)]
[('我', 0), ('是', 0), ('你', 0), ('祖宗', 0), ('😏', 0)]
[('是', 0), ('敌', 0), ('是', 0), ('友别', 0), ('是', 0), ('狗', 0), ('@', 0), ('!', 0)]
[('小嘴儿', 0), ('别太贱', 0)]
[('起个', 0), ('B', 0), ('名想', 0), ('半天', 0)]
[('自己', 0), ('心里', 0), ('没点', 0), ('B', 0), ('数', 0), ('吗', 0)]
[('傻帽', 0), ('^', 0), ('_', 0), ('^', 0)]
[('沙雕', 0), ('🙄', 0)]
[('滚', 0), ('犊子', 0)]
[('艸', 0), ('芔', 0), ('茻', 0)]
[('你', 0), ('奶奶', 0), ('的', 0), ('熊', 0)]
[('我', 0), ('是', 0), ('猪', 0)]
[('操海斗', 0)]
[('艸', 0)]
[('同', 0), ('你', 0), ('死', 0), ('过', 0)]
[('彻头彻尾', 0), ('🔛', 0), ('渣', 0), ('男', 0)]
[('憨批', 0)]
[('狗', 0), ('比人忠', 0), ('🙏', 0)]
[('穷', 0), ('逼', 0)]
[('妈', 0), ('了', 0), ('个', 0), ('比', 0)]
[('再输', 0), ('曰', 0), ('你', 0), ('妈', 0)]
[('容偲盛', 0), ('是', 0), ('渣', 0), ('男', 0), ('_', 0)]
[('忘记', 0), ('TM', 0), ('好难', 0)]
[('草腻', 0), ('马', 0)]
[('丢雷楼', 0), ('某', 0)]
[('沙雕', 0), ('游戏', 0)]
[('啥', 0), ('比游西', 0), ('玩', 0), ('你', 0), ('妈', 0), ('删', 0), ('了', 0)]
[('艸', 0), ('艸', 0), ('艸', 0)]
[('SY', 0), ('加油', 0), ('💪', 0), ('你', 0), ('是', 0), ('最', 0), ('TM', 0), ('没用', 0), ('的', 0), ('👍', 0), ('🏿', 0)]
[('A', 0), ('妈', 0), ('的', 0), ('时', 0), ('间', 0), ('别跑', 0)]
[('(', 0), ('*', 0), ('´', 0), ('艸', 0), ('`', 0), ('*', 0), (')', 0)]
[('不要脸', 0)]
[('死妈', 0), ('的', 0), ('彩票', 0)]
[('你', 0), ('是', 0), ('狗', 0), ('吧', 0)]
[('渣', 0), ('男', 0), ('不', 0), ('聊天', 0)]
[('别来', 0), ('装傻', 0), ('逼', 0)]
[('我', 0), ('满怀希望', 0), ('的', 0), ('有', 0), ('病', 0), ('信仰', 0)]
[('不摇碧莲', 0)]
[('渣', 0), ('男', 0), ('🖤', 0)]
[('@', 0), ('沙雕', 0), ('芝麻官', 0), ('-', 0)]
[('沙雕', 0), ('歪果', 0), ('佬', 0)]
[('你', 0), ('个', 0), ('🐶', 0), ('批', 0)]
[('你', 0), ('妈', 0), ('啦', 0), ('逼', 0)]
[('傻', 0), ('X', 0), ('破', 0), ('给', 0), ('你', 0), ('您', 0)]
[('狗托', 0)]
[('喝多', 0), ('的', 0), ('渣', 0), ('男', 0)]
[('都', 0), ('是', 0), ('傻', 0), ('X', 0)]
[('小混蛋', 0), ('💋', 0)]
[('臭', 0), ('不要脸', 0), ('的', 0), ('白', 0), ('胖子', 0), ('!', 0)]
[('可爱的', 0), ('林死', 0), ('狗', 0)]
[('小鸡', 0), ('巴', 0)]
[('你', 0), ('算', 0), ('什么', 0), ('东', 0), ('西', 0)]
[('坟头', 0), ('蹦迪', 0), ('调戏', 0), ('鬼', 0)]
```
### model 斷詞後被偵測到 涉政字眼
```python=
[('叼', 0), ('毛', 1)]
[('不起', 0), ('玩不起', 0), ('就', 0), ('不要', 0), ('玩', 0), ('!', 0), ('台湾', 1), ('垃圾', 2)]
[('王', 1), ('尼玛', 2)]
[('张慧', 1), ('小', 0), ('傻逼', 2)]
[('艹', 2), ('爵战', 1), ('`', 0), ('真', 0), ('我', 0), ('风采', 0)]
[('金', 1), ('猴爷', 0), ('死妈', 0)]
[('王实', 1), ('操', 2)]
[('舔', 1), ('狗', 0), ('🐶', 0)]
[('SB', 0), ('金', 1), ('猴爷', 0)]
[('师傅', 0), ('干死', 2), ('金', 1), ('猴爷', 0)]
[('草泥马', 2), ('金', 1), ('猴爷', 0)]
[('金', 1), ('猴爷', 0), ('全家', 4), ('死全家', 2)]
```
### model 誤判為色情
```python=
[('卧槽', 4)]
[('老', 0), ('鸡巴', 4)]
[('别', 0), ('在', 0), ('我', 0), ('面前', 0), ('犯贱', 4)]
[('哎哟', 0), ('卧槽', 4)]
[('游戏', 0), ('公司', 0), ('全家', 4), ('丝全家', 0)]
[('常常', 0), ('因为', 0), ('自己', 0), ('不够', 0), ('傻', 0), ('屌', 4), ('而', 0), ('自卑', 0), ('🙈', 0)]
[('小逼', 4), ('.', 0)]
[('天天', 0), ('贱', 4)]
[('殷操', 4)]
[('꧁', 0), ('卧槽', 4), ('!', 0), ('꧂', 0)]
[('@', 0), ('卧槽', 4), ('天', 0), ('使', 0), ('👼', 0), ('@', 0)]
[('小', 0), ('骚货', 4)]
[('卧槽', 4), ('无情', 0)]
[('傻', 0), ('屌', 4), ('侠', 0)]
[('人', 0), ('生就', 0), ('俩', 0), ('字', 0), ('卧槽', 4)]
[('真', 0), ('特么', 4), ('纯', 0), ('-', 0)]
[('妳', 0), ('娘', 0), ('老', 0), ('鸡掰', 4)]
[('犯贱', 4)]
[('贱', 4), ('朕', 1), ('骑', 0), ('鸡', 4)]
[('哎', 0), ('呦', 0), ('卧槽', 4)]
[('喜', 0), ('逼逼', 4)]
[('💓', 0), ('你', 0), ('安好', 0), ('若安好', 0), ('哎', 0), ('呦', 0), ('卧槽', 4)]
[('我', 0), ('TM', 0), ('就', 0), ('是', 0), ('犯贱', 4)]
[('屌', 4), ('你', 0), ('婆婆', 0)]
[('屌', 4), ('你', 0), ('不', 0), ('死', 0)]
[('你', 0), ('全家', 4), ('都', 0), ('母猪', 0)]
```
## model 漏判 違禁(3) 原因
### model 沒有學習到 違禁(3) 字眼
```python=
[('菩提', 0), ('心', 0)]
[('浮屠', 0)]
[('连长', 0)]
[('匪帮', 0)]
[('黑道', 0), ('奶爸', 0)]
[('虚伪', 0), ('是', 0), ('微笑', 0), ('背后', 0), ('的', 0), ('势力', 0), ('黑势力', 0)]
[('释迦', 0)]
[('沙漠', 0), ('之鹰', 0)]
[('人似', 0), ('砒霜', 0)]
[('情如', 0), ('含笑', 0), ('饮', 0), ('砒霜', 0)]
[('心', 0), ('灵', 0), ('砒霜', 0)]
[('猎枪', 0)]
[('深爱', 0), ('是', 0), ('含笑', 0), ('饮', 0), ('砒霜', 0)]
[('纯', 0), ('K', 0), ('粉', 0)]
[('铊', 0)]
[('巴雷特', 0)]
[('深爱', 0), ('是', 0), ('含笑', 0), ('饮', 0), ('砒霜', 0), ('°', 0)]
[('闪光', 0), ('闪光弹', 0)]
[('含笑', 0), ('饮', 0), ('砒霜', 0)]
[('掌心', 0), ('掌心雷', 0)]
```
### model 判斷為涉政 字眼
```python=
[('弥陀', 0), ('阿弥陀', 1), ('阿弥陀佛', 1)]
[('福尔', 0), ('马林', 1), ('福尔马林', 0)]
[('A', 0), ('🇨', 0), ('🇳', 0), ('佛陀', 1), ('座下', 0), ('弟子', 0), ('🇨', 0), ('🇳', 0)]
[('穆罕', 0), ('默德', 0), ('穆罕默', 0), ('穆罕默德', 1), ('🃏', 0), ('土锤', 0)]
[('佛陀', 1)]
[('黑手', 0), ('黑手党', 1)]
[('穆罕', 0), ('默德', 0), ('穆罕默', 0), ('穆罕默德', 1), ('su', 0)]
[('霸权', 0), ('主义', 0), ('霸权主义', 1)]
[('张小雷', 1)]
[('为', 0), ('长者', 1), ('续', 0), ('一', 0), ('秒', 0), ('จ', 0), ('ุ', 0), ('๊', 0), ('บ', 0)]
[('黑帮', 1), ('V', 0), ('册', 0), ('老', 0), ('♌', 0)]
[('喇嘛', 1), ('达', 0)]
[('九', 0), ('喇嘛', 1)]
[('军警', 1)]
[('毒药', 3), ('国际', 1)]
```