# Enhance pandas function #### pd.pivot 改善 pd.pivot column name 沒有 prefix 的問題 ```python def my_pivot(data, index=None, columns=None, values=None, _suffix = '', **kwargs): data = pd.pivot_table(data, index=index, columns=columns, values=values, **kwargs) data.columns = [(data.columns.name + '_' + col + _suffix).strip() for col in data.columns.values if col not in index] data.reset_index(inplace=True) return data ``` #### Calculate categorical variable value frequency (multiple columns) 原本用 groupby + value_counts 的話只能一行一行算 ```python def multi_value_counts(data, id_vars = None, columns = None, **kwargs): ''' To calculate the frequency of categorical values in each column. Note: * id_vars only can be passed as a string or object. * **kwargs is from pd.pivot_table(). ''' id_ = data[id_vars].drop_duplicates().to_frame() for col in columns: tp = data.groupby(id_vars)[col].value_counts().to_frame(name='n').reset_index() tp = my_pivot(tp, index=id_vars, columns=col, values='n', **kwargs) id_ = id_.merge(tp, on = id_vars, how = 'left') return id_ ``` ```python def multi_unique_count(data, id_vars = None, columns = None, **kwargs): ''' To calculate the unique number of categorical values in each column. Note: * id_vars only can be passed as a string or object. ''' id_ = data[id_vars].drop_duplicates().to_frame() for col in columns: name = col + '_n' tp = data.groupby(id_vars)[col].nunique().to_frame(name=name).reset_index() id_ = id_.merge(tp, on = id_vars, how = 'left') return id_ def string2date(data, columns, _format = '%Y-%m-%d'): for col in columns: data[col] = data[col].apply(lambda x: datetime.strptime(x, '%Y-%m-%d')) return data ```