# -*- coding: utf-8 -*-
"""
Created on Sun Jul 5 22:33:01 2020
@author: jchang7
"""
import pandas as pd
import numpy as np
import os
import datetime
######################################## Get Data
churn = pd.read_csv('Churn Data.csv', index_col=[0,1], skipinitialspace=True)
# drop duplicate uuid with multiple city
churn = churn.drop_duplicates(subset = ["store_name","fo_date","cuisine","longitude"])
# count the cuisine tags
tmp = churn.groupby(level=[0,1]).size()
churn = churn.drop_duplicates(subset = ["store_name","fo_date","longitude"])
churn = pd.concat([churn,tmp], axis = 1, sort = False)
churn = churn.rename(columns={0: "cuisineN"})
tmp = pd.read_csv('Churn expo.csv', index_col=[0,1], skipinitialspace=True)
churn = pd.concat([churn,tmp],axis=1, join="inner")
active = pd.read_csv('Active Data1.csv', index_col=[0,1], skipinitialspace=True,
low_memory=False).append(pd.read_csv('Active Data2.csv',
index_col=[0,1], skipinitialspace=True,low_memory=False))
# drop duplicate uuid with multiple city
active = active.drop_duplicates(subset = ["store_name","fo_date","cuisine","longitude"])
# count the cuisine tags
tmp = active.groupby(level=[0,1]).size()
active = active.drop_duplicates(subset = ["store_name","fo_date","longitude"])
active = pd.concat([active,tmp], axis = 1, sort = False)
active = active.rename(columns={0: "cuisineN"})
tmp = pd.read_csv('Active expo.csv', index_col=[0,1], skipinitialspace=True)
active = pd.concat([active,tmp],axis=1, join="inner")
# make \N no exist and make column type from string to float
churn = churn.replace("\\N",np.nan) ; active = active.replace("\\N",np.nan)
churn.to_csv('churn_tmp.csv') ; active.to_csv('active_tmp.csv')
churn = pd.read_csv('churn_tmp.csv' , index_col=[0,1], skipinitialspace=True)
active = pd.read_csv('active_tmp.csv', index_col=[0,1], skipinitialspace=True)
os.remove("churn_tmp.csv") ; os.remove("active_tmp.csv")
######################################## Tidy up data
churn ['fo_date'] = pd.to_datetime(churn ['fo_date'], format='%Y-%m-%d')
churn ['FA_date'] = pd.to_datetime(churn ['FA_date'], format='%Y-%m-%d')
churn ['ft_date'] = pd.to_datetime(churn ['ft_date'], format='%Y-%m-%d')
churn ['lo_date'] = pd.to_datetime(churn ['lo_date'], format='%Y-%m-%d')
churn ['lt_date'] = pd.to_datetime(churn ['lt_date'], format='%Y-%m-%d')
active['fo_date'] = pd.to_datetime(active['fo_date'], format='%Y-%m-%d')
active['FA_date'] = pd.to_datetime(active['FA_date'], format='%Y-%m-%d')
active['ft_date'] = pd.to_datetime(active['ft_date'], format='%Y-%m-%d')
active['lo_date'] = pd.to_datetime(active['lo_date'], format='%Y-%m-%d')
active['lt_date'] = pd.to_datetime(active['lt_date'], format='%Y-%m-%d')
churn ['open_FO30d'] = churn ['open_FO30d']/churn ['menu_FO30d']
churn ['open_FT30d'] = churn ['open_FT30d']/churn ['menu_FT30d']
churn ['open_LO30d'] = churn ['open_LO30d']/churn ['menu_LO30d']
churn ['open_LT30d'] = churn ['open_LT30d']/churn ['menu_LT30d']
active['open_FO30d'] = active['open_FO30d']/active['menu_FO30d']
active['open_FT30d'] = active['open_FT30d']/active['menu_FT30d']
active['open_LO30d'] = active['open_LO30d']/active['menu_LO30d']
active['open_LT30d'] = active['open_LT30d']/active['menu_LT30d']
churn ['expo_FO30d'] = churn ['expo_front_FO30d'] + churn ['expo_carou_FO30d'] + churn ['expo_search_FO30d']
churn ['expo_FT30d'] = churn ['expo_front_FT30d'] + churn ['expo_carou_FT30d'] + churn ['expo_search_FT30d']
churn ['expo_LO30d'] = churn ['expo_front_LO30d'] + churn ['expo_carou_LO30d'] + churn ['expo_search_LO30d']
churn ['expo_LT30d'] = churn ['expo_front_LT30d'] + churn ['expo_carou_LT30d'] + churn ['expo_search_LT30d']
active['expo_FO30d'] = active['expo_front_FO30d'] + active['expo_carou_FO30d'] + active['expo_search_FO30d']
active['expo_FT30d'] = active['expo_front_FT30d'] + active['expo_carou_FT30d'] + active['expo_search_FT30d']
active['expo_LO30d'] = active['expo_front_LO30d'] + active['expo_carou_LO30d'] + active['expo_search_LO30d']
active['expo_LT30d'] = active['expo_front_LT30d'] + active['expo_carou_LT30d'] + active['expo_search_LT30d']
churn ['taps_FO30d'] = churn ['taps_front_FO30d'] + churn ['taps_carou_FO30d'] + churn ['taps_search_FO30d']
churn ['taps_FT30d'] = churn ['taps_front_FT30d'] + churn ['taps_carou_FT30d'] + churn ['taps_search_FT30d']
churn ['taps_LO30d'] = churn ['taps_front_LO30d'] + churn ['taps_carou_LO30d'] + churn ['taps_search_LO30d']
churn ['taps_LT30d'] = churn ['taps_front_LT30d'] + churn ['taps_carou_LT30d'] + churn ['taps_search_LT30d']
active['taps_FO30d'] = active['taps_front_FO30d'] + active['taps_carou_FO30d'] + active['taps_search_FO30d']
active['taps_FT30d'] = active['taps_front_FT30d'] + active['taps_carou_FT30d'] + active['taps_search_FT30d']
active['taps_LO30d'] = active['taps_front_LO30d'] + active['taps_carou_LO30d'] + active['taps_search_LO30d']
active['taps_LT30d'] = active['taps_front_LT30d'] + active['taps_carou_LT30d'] + active['taps_search_LT30d']
churn ['taps_front_FO30d' ] = churn ['taps_front_FO30d' ]/churn ['expo_front_FO30d' ]
churn ['taps_carou_FO30d' ] = churn ['taps_carou_FO30d' ]/churn ['expo_carou_FO30d' ]
churn ['taps_search_FO30d'] = churn ['taps_search_FO30d']/churn ['expo_search_FO30d']
active['taps_front_FO30d' ] = active['taps_front_FO30d' ]/active['expo_front_FO30d' ]
active['taps_carou_FO30d' ] = active['taps_carou_FO30d' ]/active['expo_carou_FO30d' ]
active['taps_search_FO30d'] = active['taps_search_FO30d']/active['expo_search_FO30d']
churn ['taps_front_FT30d' ] = churn ['taps_front_FT30d' ]/churn ['expo_front_FT30d' ]
churn ['taps_carou_FT30d' ] = churn ['taps_carou_FT30d' ]/churn ['expo_carou_FT30d' ]
churn ['taps_search_FT30d'] = churn ['taps_search_FT30d']/churn ['expo_search_FT30d']
active['taps_front_FT30d' ] = active['taps_front_FT30d' ]/active['expo_front_FT30d' ]
active['taps_carou_FT30d' ] = active['taps_carou_FT30d' ]/active['expo_carou_FT30d' ]
active['taps_search_FT30d'] = active['taps_search_FT30d']/active['expo_search_FT30d']
churn ['taps_front_LT30d' ] = churn ['taps_front_LT30d' ]/churn ['expo_front_LT30d' ]
churn ['taps_carou_LT30d' ] = churn ['taps_carou_LT30d' ]/churn ['expo_carou_LT30d' ]
churn ['taps_search_LT30d'] = churn ['taps_search_LT30d']/churn ['expo_search_LT30d']
active['taps_front_LT30d' ] = active['taps_front_LT30d' ]/active['expo_front_LT30d' ]
active['taps_carou_LT30d' ] = active['taps_carou_LT30d' ]/active['expo_carou_LT30d' ]
active['taps_search_LT30d'] = active['taps_search_LT30d']/active['expo_search_LT30d']
churn ['taps_front_LO30d' ] = churn ['taps_front_LO30d' ]/churn ['expo_front_LO30d' ]
churn ['taps_carou_LO30d' ] = churn ['taps_carou_LO30d' ]/churn ['expo_carou_LO30d' ]
churn ['taps_search_LO30d'] = churn ['taps_search_LO30d']/churn ['expo_search_LO30d']
active['taps_front_LO30d' ] = active['taps_front_LO30d' ]/active['expo_front_LO30d' ]
active['taps_carou_LO30d' ] = active['taps_carou_LO30d' ]/active['expo_carou_LO30d' ]
active['taps_search_LO30d'] = active['taps_search_LO30d']/active['expo_search_LO30d']
# menu to trip
churn ['S2Rm_FO30d'] = churn ['trips_FO30d']/churn ['taps_FO30d']
active['S2Rm_FO30d'] = active['trips_FO30d']/active['taps_FO30d']
churn ['S2Rm_FT30d'] = churn ['trips_FT30d']/churn ['taps_FT30d']
active['S2Rm_FT30d'] = active['trips_FT30d']/active['taps_FT30d']
churn ['S2Rm_LT30d'] = churn ['trips_LT30d']/churn ['taps_LT30d']
active['S2Rm_LT30d'] = active['trips_LT30d']/active['taps_LT30d']
churn ['S2Rm_LO30d'] = churn ['trips_LO30d']/churn ['taps_LO30d']
active['S2Rm_LO30d'] = active['trips_LO30d']/active['taps_LO30d']
# exposure to trip
churn ['S2Re_FO30d'] = churn ['trips_FO30d']/churn ['expo_FO30d']
active['S2Re_FO30d'] = active['trips_FO30d']/active['expo_FO30d']
churn ['S2Re_FT30d'] = churn ['trips_FT30d']/churn ['expo_FT30d']
active['S2Re_FT30d'] = active['trips_FT30d']/active['expo_FT30d']
churn ['S2Re_LT30d'] = churn ['trips_LT30d']/churn ['expo_LT30d']
active['S2Re_LT30d'] = active['trips_LT30d']/active['expo_LT30d']
churn ['S2Re_LO30d'] = churn ['trips_LO30d']/churn ['expo_LO30d']
active['S2Re_LO30d'] = active['trips_LO30d']/active['expo_LO30d']
# expo to menu
churn ['taps_FO30d'] = churn ['taps_FO30d']/churn ['expo_FO30d']
active['taps_FO30d'] = active['taps_FO30d']/active['expo_FO30d']
churn ['taps_FT30d'] = churn ['taps_FT30d']/churn ['expo_FT30d']
active['taps_FT30d'] = active['taps_FT30d']/active['expo_FT30d']
churn ['taps_LT30d'] = churn ['taps_LT30d']/churn ['expo_LT30d']
active['taps_LT30d'] = active['taps_LT30d']/active['expo_LT30d']
churn ['taps_LO30d'] = churn ['taps_LO30d']/churn ['expo_LO30d']
active['taps_LO30d'] = active['taps_LO30d']/active['expo_LO30d']
churn ['ob-ft'] = (churn ['ft_date'] - churn ['fo_date']).dt.days
active['ob-ft'] = (active['ft_date'] - active['fo_date']).dt.days
# active days not yet
# convert date to interger
churn ['fo_date'] =(churn ['fo_date'] - pd.to_datetime('1899-12-30')).dt.days
churn ['FA_date'] =(churn ['FA_date'] - pd.to_datetime('1899-12-30')).dt.days
churn ['ft_date'] =(churn ['ft_date'] - pd.to_datetime('1899-12-30')).dt.days
churn ['lo_date'] =(churn ['lo_date'] - pd.to_datetime('1899-12-30')).dt.days
churn ['lt_date'] =(churn ['lt_date'] - pd.to_datetime('1899-12-30')).dt.days
active['fo_date'] =(active['fo_date'] - pd.to_datetime('1899-12-30')).dt.days
active['FA_date'] =(active['FA_date'] - pd.to_datetime('1899-12-30')).dt.days
active['ft_date'] =(active['ft_date'] - pd.to_datetime('1899-12-30')).dt.days
active['lo_date'] =(active['lo_date'] - pd.to_datetime('1899-12-30')).dt.days
active['lt_date'] =(active['lt_date'] - pd.to_datetime('1899-12-30')).dt.days
######################################## Find Comapre Resto
compare = active.drop(['store_name', 'city', 'city_id2', 'district','cuisine', 'type'], axis=1)
col_ft = churn.columns.get_loc("ft_date")
col_tp = churn.columns.get_loc("trips_FT30d")
col_lg = churn.columns.get_loc("longitude")
col_la = churn.columns.get_loc("latitude")
ch_vs = pd.DataFrame(columns=compare.columns)
ch_vs['uuid']="" ; ch_vs['months']="" ; ch_vs['compareN']=""
for i in range(len(churn)):
tmp = compare[(compare['ft_date'] >= churn.iloc[i,col_ft] - 150) &
(compare['ft_date'] <= churn.iloc[i,col_ft] + 150) &
(compare['trips_FT30d'] >= churn.iloc[i,col_tp] * 0.5) &
(compare['trips_FT30d'] <= churn.iloc[i,col_tp] * 1.5) &
(compare['longitude'] >= churn.iloc[i,col_lg] - 5/101.77545) &
(compare['longitude'] <= churn.iloc[i,col_lg] + 5/101.77545) &
(compare['latitude' ] >= churn.iloc[i,col_la] - 5/110.9362) &
(compare['latitude' ] <= churn.iloc[i,col_la] + 5/110.9362)
]
ch_vs = ch_vs.append(pd.Series(tmp.mean(axis=0), name = churn.index[i][1]))
ch_vs.loc[churn.index[i][1],'months' ] = churn.index[i][0]
ch_vs.loc[churn.index[i][1],'uuid' ] = churn.index[i][1]
ch_vs.loc[churn.index[i][1],'compareN'] = len(tmp)
ch_vs.set_index(['months', 'uuid'], inplace=True)
tmp = churn.drop(['store_name','city','city_id2','district',
'cuisine','type','lost_date',
# 'lo_date','menu_LO30d','open_LO30d','trips_LO30d',
# 'onlineday_LO30d','posi_LO30d','load_LO30d','expo_front_LO30d',
# 'taps_front_LO30d','expo_carou_LO30d','taps_carou_LO30d',
# 'expo_search_LO30d','taps_search_LO30d'
], axis=1)
tmp.to_csv('churn_d.csv')
ch_vs.to_csv('compare.csv')