# FRM 學習筆記
## 我們一開始是使用資料清洗
:::success
有鑑於壓力太大
來學學 ML 相關
:::
:::spoiler
``` python=
#@title api code
# This code will be hidden when the notebook is loaded.
#!pip install kaggle
# api get from kaggle.json
# 當你下載就會找到
#!pip install kaggle
# api get from kaggle.json local side
api_token = {"username":"likemaster","key":"5e609b97bd39835c66993b087e66fcf9"}
```
:::
``` python=
import json
import zipfile
import os
if not os.path.exists("/root/.kaggle"):
os.makedirs("/root/.kaggle")
#這邊是直接更改 API
with open('/root/.kaggle/kaggle.json', 'w') as file:
json.dump(api_token, file)
!chmod 600 /root/.kaggle/kaggle.json
if not os.path.exists("/kaggle"):
os.makedirs("/kaggle")
os.chdir('/kaggle')
# api name !留著其他取代
!kaggle competitions download -c iamthebestcoderopen2020
!ls /kaggle
!mkdir -p data
# path you can do
folder_path ="/kaggle"
for i in range (len(os.listdir(folder_path))):
a = os.listdir(folder_path)
try :
!unzip -uq {a[i]} -d "/kaggle/data"
except:
pass
```
``` python=
plt.figure(figsize=(5, 5))
data.isnull().mean(axis=0).plot.barh()#為空值得地方
plt.title("Ratio of missing values per columns")
```
找出可能有空值得地方

再來我們做出轉換 RMF
``` python=
# I'll just fix the date to be one day after the last entry in the databse
NOW = dt.datetime(2011,12,10)
df_cleaned['InvoiceDate'] = pd.to_datetime(df_cleaned['InvoiceDate'])
```
``` python=
custom_aggregation = {}
custom_aggregation["InvoiceDate"] = lambda x:x.iloc[0]
custom_aggregation["CustomerID"] = lambda x:x.iloc[0]
custom_aggregation["TotalPrice"] = "sum"
```
``` python=
rfmTable = df_cleaned.groupby("InvoiceNo").agg(custom_aggregation)
rfmTable["Recency"] = NOW - rfmTable["InvoiceDate"]
rfmTable["Recency"] = pd.to_timedelta(rfmTable["Recency"]).astype("timedelta64[D]")
rfmTable.head(5)
```
``` python=
rfmTable = orders.groupby('customer').agg({'order_date': lambda x: (NOW - x.max()).days, # Recency
'order_id': lambda x: len(x), # Frequency
'grand_total': lambda x: x.sum()}) # Monetary Value
rfmTable['order_date'] = rfmTable['order_date'].astype(int)
rfmTable.rename(columns={'order_date': 'recency',
'order_id': 'frequency',
'grand_total': 'monetary_value'}, inplace=True)
quantiles = rfmTable.quantile(q=[0.25,0.5,0.75])
quantiles = quantiles.to_dict()
rfmSegmentation = rfmTable
rfmSegmentation['R_Quartile'] = rfmSegmentation['recency'].apply(RClass, args=('recency',quantiles,))
rfmSegmentation['F_Quartile'] = rfmSegmentation['frequency'].apply(FMClass, args=('frequency',quantiles,))
rfmSegmentation['M_Quartile'] = rfmSegmentation['monetary_value'].apply(FMClass, args=('monetary_value',quantiles,))
rfmSegmentation['RFMClass'] = rfmSegmentation.R_Quartile.map(str) + rfmSegmentation.F_Quartile.map(str) + rfmSegmentation.M_Quartile.map(str)
rfmSegmentation.to_csv(outputfile, sep=',')
```