# Enhance pandas function
#### pd.pivot
改善 pd.pivot column name 沒有 prefix 的問題
```python
def my_pivot(data, index=None, columns=None, values=None, _suffix = '', **kwargs):
data = pd.pivot_table(data, index=index, columns=columns, values=values, **kwargs)
data.columns = [(data.columns.name + '_' + col + _suffix).strip() for col in data.columns.values if col not in index]
data.reset_index(inplace=True)
return data
```
#### Calculate categorical variable value frequency (multiple columns)
原本用 groupby + value_counts 的話只能一行一行算
```python
def multi_value_counts(data, id_vars = None, columns = None, **kwargs):
'''
To calculate the frequency of categorical values in each column.
Note:
* id_vars only can be passed as a string or object.
* **kwargs is from pd.pivot_table().
'''
id_ = data[id_vars].drop_duplicates().to_frame()
for col in columns:
tp = data.groupby(id_vars)[col].value_counts().to_frame(name='n').reset_index()
tp = my_pivot(tp, index=id_vars, columns=col, values='n', **kwargs)
id_ = id_.merge(tp, on = id_vars, how = 'left')
return id_
```
```python
def multi_unique_count(data, id_vars = None, columns = None, **kwargs):
'''
To calculate the unique number of categorical values in each column.
Note:
* id_vars only can be passed as a string or object.
'''
id_ = data[id_vars].drop_duplicates().to_frame()
for col in columns:
name = col + '_n'
tp = data.groupby(id_vars)[col].nunique().to_frame(name=name).reset_index()
id_ = id_.merge(tp, on = id_vars, how = 'left')
return id_
def string2date(data, columns, _format = '%Y-%m-%d'):
for col in columns:
data[col] = data[col].apply(lambda x: datetime.strptime(x, '%Y-%m-%d'))
return data
```