# data preprossong
###### tags: `Code`
```python
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import StandardScaler
tf.config.list_physical_devices()
```
```python
import pandas as pd
import numpy as np
from datetime import datetime
```
```python
data_folder = "./g-research-crypto-forecasting/"
!ls $data_folder
```
asset_details.csv example_test.csv supplemental_train.csv
detailed-api-introduction.ipynb gresearch_crypto train.csv
example_sample_submission.csv submission.csv
```python
crypto_df = pd.read_csv(data_folder + 'train.csv')
crypto_df
```
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>timestamp</th>
<th>Asset_ID</th>
<th>Count</th>
<th>Open</th>
<th>High</th>
<th>Low</th>
<th>Close</th>
<th>Volume</th>
<th>VWAP</th>
<th>Target</th>
</tr>
</thead>
<tbody>
<tr>
<th>0</th>
<td>1514764860</td>
<td>2</td>
<td>40.0</td>
<td>2376.580000</td>
<td>2399.500000</td>
<td>2357.140000</td>
<td>2374.590000</td>
<td>1.923301e+01</td>
<td>2373.116392</td>
<td>-0.004218</td>
</tr>
<tr>
<th>1</th>
<td>1514764860</td>
<td>0</td>
<td>5.0</td>
<td>8.530000</td>
<td>8.530000</td>
<td>8.530000</td>
<td>8.530000</td>
<td>7.838000e+01</td>
<td>8.530000</td>
<td>-0.014399</td>
</tr>
<tr>
<th>2</th>
<td>1514764860</td>
<td>1</td>
<td>229.0</td>
<td>13835.194000</td>
<td>14013.800000</td>
<td>13666.110000</td>
<td>13850.176000</td>
<td>3.155006e+01</td>
<td>13827.062093</td>
<td>-0.014643</td>
</tr>
<tr>
<th>3</th>
<td>1514764860</td>
<td>5</td>
<td>32.0</td>
<td>7.659600</td>
<td>7.659600</td>
<td>7.656700</td>
<td>7.657600</td>
<td>6.626713e+03</td>
<td>7.657713</td>
<td>-0.013922</td>
</tr>
<tr>
<th>4</th>
<td>1514764860</td>
<td>7</td>
<td>5.0</td>
<td>25.920000</td>
<td>25.920000</td>
<td>25.874000</td>
<td>25.877000</td>
<td>1.210873e+02</td>
<td>25.891363</td>
<td>-0.008264</td>
</tr>
<tr>
<th>...</th>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
</tr>
<tr>
<th>24236801</th>
<td>1632182400</td>
<td>9</td>
<td>775.0</td>
<td>157.181571</td>
<td>157.250000</td>
<td>156.700000</td>
<td>156.943857</td>
<td>4.663725e+03</td>
<td>156.994319</td>
<td>NaN</td>
</tr>
<tr>
<th>24236802</th>
<td>1632182400</td>
<td>10</td>
<td>34.0</td>
<td>2437.065067</td>
<td>2438.000000</td>
<td>2430.226900</td>
<td>2432.907467</td>
<td>3.975460e+00</td>
<td>2434.818747</td>
<td>NaN</td>
</tr>
<tr>
<th>24236803</th>
<td>1632182400</td>
<td>13</td>
<td>380.0</td>
<td>0.091390</td>
<td>0.091527</td>
<td>0.091260</td>
<td>0.091349</td>
<td>2.193732e+06</td>
<td>0.091388</td>
<td>NaN</td>
</tr>
<tr>
<th>24236804</th>
<td>1632182400</td>
<td>12</td>
<td>177.0</td>
<td>0.282168</td>
<td>0.282438</td>
<td>0.281842</td>
<td>0.282051</td>
<td>1.828508e+05</td>
<td>0.282134</td>
<td>NaN</td>
</tr>
<tr>
<th>24236805</th>
<td>1632182400</td>
<td>11</td>
<td>48.0</td>
<td>232.695000</td>
<td>232.800000</td>
<td>232.240000</td>
<td>232.275000</td>
<td>1.035123e+02</td>
<td>232.569697</td>
<td>NaN</td>
</tr>
</tbody>
</table>
<p>24236806 rows × 10 columns</p>
</div>
```python
asset_details = pd.read_csv(data_folder + 'asset_details.csv').set_index("Asset_ID")
asset_details
```
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>Weight</th>
<th>Asset_Name</th>
</tr>
<tr>
<th>Asset_ID</th>
<th></th>
<th></th>
</tr>
</thead>
<tbody>
<tr>
<th>2</th>
<td>2.397895</td>
<td>Bitcoin Cash</td>
</tr>
<tr>
<th>0</th>
<td>4.304065</td>
<td>Binance Coin</td>
</tr>
<tr>
<th>1</th>
<td>6.779922</td>
<td>Bitcoin</td>
</tr>
<tr>
<th>5</th>
<td>1.386294</td>
<td>EOS.IO</td>
</tr>
<tr>
<th>7</th>
<td>2.079442</td>
<td>Ethereum Classic</td>
</tr>
<tr>
<th>6</th>
<td>5.894403</td>
<td>Ethereum</td>
</tr>
<tr>
<th>9</th>
<td>2.397895</td>
<td>Litecoin</td>
</tr>
<tr>
<th>11</th>
<td>1.609438</td>
<td>Monero</td>
</tr>
<tr>
<th>13</th>
<td>1.791759</td>
<td>TRON</td>
</tr>
<tr>
<th>12</th>
<td>2.079442</td>
<td>Stellar</td>
</tr>
<tr>
<th>3</th>
<td>4.406719</td>
<td>Cardano</td>
</tr>
<tr>
<th>8</th>
<td>1.098612</td>
<td>IOTA</td>
</tr>
<tr>
<th>10</th>
<td>1.098612</td>
<td>Maker</td>
</tr>
<tr>
<th>4</th>
<td>3.555348</td>
<td>Dogecoin</td>
</tr>
</tbody>
</table>
</div>
```python
btc = crypto_df[crypto_df["Asset_ID"]==1].set_index("timestamp") # Asset_ID = 1 for Bitcoin
btc.iloc[-20:] # Select recent data rows
```
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>Asset_ID</th>
<th>Count</th>
<th>Open</th>
<th>High</th>
<th>Low</th>
<th>Close</th>
<th>Volume</th>
<th>VWAP</th>
<th>Target</th>
</tr>
<tr>
<th>timestamp</th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
</tr>
</thead>
<tbody>
<tr>
<th>1632181260</th>
<td>1</td>
<td>2281.0</td>
<td>42718.815000</td>
<td>42819.380000</td>
<td>42690.84</td>
<td>42781.970571</td>
<td>76.339988</td>
<td>42755.785162</td>
<td>0.003246</td>
</tr>
<tr>
<th>1632181320</th>
<td>1</td>
<td>2642.0</td>
<td>42772.921250</td>
<td>42827.100000</td>
<td>42690.75</td>
<td>42755.592500</td>
<td>117.429123</td>
<td>42749.075916</td>
<td>0.003108</td>
</tr>
<tr>
<th>1632181380</th>
<td>1</td>
<td>2134.0</td>
<td>42762.290000</td>
<td>42811.300000</td>
<td>42694.37</td>
<td>42717.234286</td>
<td>78.049458</td>
<td>42749.024591</td>
<td>0.002770</td>
</tr>
<tr>
<th>1632181440</th>
<td>1</td>
<td>2165.0</td>
<td>42703.802500</td>
<td>42752.000000</td>
<td>42636.76</td>
<td>42657.202500</td>
<td>86.518794</td>
<td>42680.263993</td>
<td>0.002726</td>
</tr>
<tr>
<th>1632181500</th>
<td>1</td>
<td>2177.0</td>
<td>42655.528484</td>
<td>42715.990000</td>
<td>42630.17</td>
<td>42678.721429</td>
<td>93.523647</td>
<td>42679.036829</td>
<td>NaN</td>
</tr>
<tr>
<th>1632181560</th>
<td>1</td>
<td>2352.0</td>
<td>42688.935000</td>
<td>42810.260000</td>
<td>42662.27</td>
<td>42778.291250</td>
<td>97.442472</td>
<td>42739.891567</td>
<td>NaN</td>
</tr>
<tr>
<th>1632181620</th>
<td>1</td>
<td>2080.0</td>
<td>42790.287143</td>
<td>42828.000000</td>
<td>42742.36</td>
<td>42790.175714</td>
<td>97.634234</td>
<td>42794.666167</td>
<td>NaN</td>
</tr>
<tr>
<th>1632181680</th>
<td>1</td>
<td>2058.0</td>
<td>42792.855714</td>
<td>42827.700000</td>
<td>42713.48</td>
<td>42731.731429</td>
<td>78.468814</td>
<td>42786.068940</td>
<td>NaN</td>
</tr>
<tr>
<th>1632181740</th>
<td>1</td>
<td>3717.0</td>
<td>42745.153750</td>
<td>42937.310000</td>
<td>42713.48</td>
<td>42898.565000</td>
<td>116.567006</td>
<td>42828.789861</td>
<td>NaN</td>
</tr>
<tr>
<th>1632181800</th>
<td>1</td>
<td>3253.0</td>
<td>42896.174286</td>
<td>42966.980000</td>
<td>42852.07</td>
<td>42939.645714</td>
<td>113.220529</td>
<td>42910.257438</td>
<td>NaN</td>
</tr>
<tr>
<th>1632181860</th>
<td>1</td>
<td>2264.0</td>
<td>42937.784286</td>
<td>42971.950000</td>
<td>42899.10</td>
<td>42927.167143</td>
<td>56.655507</td>
<td>42932.332787</td>
<td>NaN</td>
</tr>
<tr>
<th>1632181920</th>
<td>1</td>
<td>2824.0</td>
<td>42933.254907</td>
<td>43014.980000</td>
<td>42907.18</td>
<td>42979.833750</td>
<td>97.404745</td>
<td>42963.462903</td>
<td>NaN</td>
</tr>
<tr>
<th>1632181980</th>
<td>1</td>
<td>1697.0</td>
<td>42974.108693</td>
<td>43005.650000</td>
<td>42929.74</td>
<td>42960.601429</td>
<td>58.419459</td>
<td>42963.080444</td>
<td>NaN</td>
</tr>
<tr>
<th>1632182040</th>
<td>1</td>
<td>1439.0</td>
<td>42971.347143</td>
<td>43008.850000</td>
<td>42953.10</td>
<td>42985.482857</td>
<td>39.329753</td>
<td>42981.323152</td>
<td>NaN</td>
</tr>
<tr>
<th>1632182100</th>
<td>1</td>
<td>1608.0</td>
<td>42982.915714</td>
<td>43006.540000</td>
<td>42958.06</td>
<td>42985.214824</td>
<td>47.701279</td>
<td>42983.752149</td>
<td>NaN</td>
</tr>
<tr>
<th>1632182160</th>
<td>1</td>
<td>1940.0</td>
<td>42983.780000</td>
<td>43001.850849</td>
<td>42878.26</td>
<td>42899.012857</td>
<td>56.850913</td>
<td>42935.489499</td>
<td>NaN</td>
</tr>
<tr>
<th>1632182220</th>
<td>1</td>
<td>2026.0</td>
<td>42904.197143</td>
<td>42932.000000</td>
<td>42840.16</td>
<td>42860.005714</td>
<td>80.993326</td>
<td>42879.576084</td>
<td>NaN</td>
</tr>
<tr>
<th>1632182280</th>
<td>1</td>
<td>1986.0</td>
<td>42859.385714</td>
<td>42887.500000</td>
<td>42797.20</td>
<td>42827.020000</td>
<td>65.677734</td>
<td>42844.090693</td>
<td>NaN</td>
</tr>
<tr>
<th>1632182340</th>
<td>1</td>
<td>4047.0</td>
<td>42839.012802</td>
<td>43042.160000</td>
<td>42818.10</td>
<td>43017.277143</td>
<td>138.335477</td>
<td>42935.761938</td>
<td>NaN</td>
</tr>
<tr>
<th>1632182400</th>
<td>1</td>
<td>2698.0</td>
<td>43009.961250</td>
<td>43048.510000</td>
<td>42961.64</td>
<td>43002.505000</td>
<td>128.206820</td>
<td>43011.414052</td>
<td>NaN</td>
</tr>
</tbody>
</table>
</div>
```python
eth = crypto_df[crypto_df["Asset_ID"]==6].set_index("timestamp") # Asset_ID = 6 for Ethereum
btc_mini = btc.iloc[-200:] # Select recent data rows
btc_mini
```
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>Asset_ID</th>
<th>Count</th>
<th>Open</th>
<th>High</th>
<th>Low</th>
<th>Close</th>
<th>Volume</th>
<th>VWAP</th>
<th>Target</th>
</tr>
<tr>
<th>timestamp</th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
</tr>
</thead>
<tbody>
<tr>
<th>1632170460</th>
<td>1</td>
<td>5694.0</td>
<td>43558.393750</td>
<td>43588.680000</td>
<td>43427.28</td>
<td>43447.003750</td>
<td>191.442881</td>
<td>43493.981877</td>
<td>-0.000805</td>
</tr>
<tr>
<th>1632170520</th>
<td>1</td>
<td>2528.0</td>
<td>43449.818571</td>
<td>43495.954028</td>
<td>43414.33</td>
<td>43440.000000</td>
<td>80.692018</td>
<td>43455.988146</td>
<td>0.000418</td>
</tr>
<tr>
<th>1632170580</th>
<td>1</td>
<td>3985.0</td>
<td>43427.082310</td>
<td>43455.218478</td>
<td>43327.29</td>
<td>43360.723750</td>
<td>117.332513</td>
<td>43391.956795</td>
<td>0.000125</td>
</tr>
<tr>
<th>1632170640</th>
<td>1</td>
<td>2185.0</td>
<td>43362.572857</td>
<td>43454.990000</td>
<td>43348.06</td>
<td>43427.242857</td>
<td>76.361478</td>
<td>43403.880056</td>
<td>0.000956</td>
</tr>
<tr>
<th>1632170700</th>
<td>1</td>
<td>2623.0</td>
<td>43419.832500</td>
<td>43467.000000</td>
<td>43351.15</td>
<td>43375.040000</td>
<td>76.018320</td>
<td>43408.267118</td>
<td>-0.000430</td>
</tr>
<tr>
<th>...</th>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
</tr>
<tr>
<th>1632182160</th>
<td>1</td>
<td>1940.0</td>
<td>42983.780000</td>
<td>43001.850849</td>
<td>42878.26</td>
<td>42899.012857</td>
<td>56.850913</td>
<td>42935.489499</td>
<td>NaN</td>
</tr>
<tr>
<th>1632182220</th>
<td>1</td>
<td>2026.0</td>
<td>42904.197143</td>
<td>42932.000000</td>
<td>42840.16</td>
<td>42860.005714</td>
<td>80.993326</td>
<td>42879.576084</td>
<td>NaN</td>
</tr>
<tr>
<th>1632182280</th>
<td>1</td>
<td>1986.0</td>
<td>42859.385714</td>
<td>42887.500000</td>
<td>42797.20</td>
<td>42827.020000</td>
<td>65.677734</td>
<td>42844.090693</td>
<td>NaN</td>
</tr>
<tr>
<th>1632182340</th>
<td>1</td>
<td>4047.0</td>
<td>42839.012802</td>
<td>43042.160000</td>
<td>42818.10</td>
<td>43017.277143</td>
<td>138.335477</td>
<td>42935.761938</td>
<td>NaN</td>
</tr>
<tr>
<th>1632182400</th>
<td>1</td>
<td>2698.0</td>
<td>43009.961250</td>
<td>43048.510000</td>
<td>42961.64</td>
<td>43002.505000</td>
<td>128.206820</td>
<td>43011.414052</td>
<td>NaN</td>
</tr>
</tbody>
</table>
<p>200 rows × 9 columns</p>
</div>
```python
eth = crypto_df[crypto_df["Asset_ID"]==6].set_index("timestamp") # Asset_ID = 6 for Ethereum
eth.info(show_counts =True)
```
<class 'pandas.core.frame.DataFrame'>
Int64Index: 1956200 entries, 1514764860 to 1632182400
Data columns (total 9 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Asset_ID 1956200 non-null int64
1 Count 1956200 non-null float64
2 Open 1956200 non-null float64
3 High 1956200 non-null float64
4 Low 1956200 non-null float64
5 Close 1956200 non-null float64
6 Volume 1956200 non-null float64
7 VWAP 1956200 non-null float64
8 Target 1955860 non-null float64
dtypes: float64(8), int64(1)
memory usage: 149.2 MB
```python
```
```python
beg_btc = btc.index[0].astype('datetime64[s]')
end_btc = btc.index[-1].astype('datetime64[s]')
beg_eth = eth.index[0].astype('datetime64[s]')
end_eth = eth.index[-1].astype('datetime64[s]')
print('BTC data goes from ', beg_btc, 'to ', end_btc)
print('Ethereum data goes from ', beg_eth, 'to ', end_eth)
```
BTC data goes from 2018-01-01T00:01:00 to 2021-09-21T00:00:00
Ethereum data goes from 2018-01-01T00:01:00 to 2021-09-21T00:00:00
```python
crypto_coins = {}
for coin_id in range(14):
coin = crypto_df[crypto_df["Asset_ID"]==coin_id].set_index("timestamp")
coin = coin.reindex(range(coin.index[0],coin.index[-1]+60,60),method='pad')
crypto_coins[coin_id] = coin[-100000:]
```
```python
```
```python
for coin_id in range(14):
coin = crypto_coins[coin_id]
count =(coin.index[1:]-coin.index[:-1]).value_counts(sort=True)
print("\n",coin_id)
print(asset_details.loc[coin_id,"Asset_Name"])
print(count)
```
0
Binance Coin
60 99999
Name: timestamp, dtype: int64
1
Bitcoin
60 99999
Name: timestamp, dtype: int64
2
Bitcoin Cash
60 99999
Name: timestamp, dtype: int64
3
Cardano
60 99999
Name: timestamp, dtype: int64
4
Dogecoin
60 99999
Name: timestamp, dtype: int64
5
EOS.IO
60 99999
Name: timestamp, dtype: int64
6
Ethereum
60 99999
Name: timestamp, dtype: int64
7
Ethereum Classic
60 99999
Name: timestamp, dtype: int64
8
IOTA
60 99999
Name: timestamp, dtype: int64
9
Litecoin
60 99999
Name: timestamp, dtype: int64
10
Maker
60 99999
Name: timestamp, dtype: int64
11
Monero
60 99999
Name: timestamp, dtype: int64
12
Stellar
60 99999
Name: timestamp, dtype: int64
13
TRON
60 99999
Name: timestamp, dtype: int64
```python
for coin_id in range(14):
coin = crypto_coins[coin_id]
count = coin.isna().sum()
print("\n",coin_id,asset_details.loc[coin_id,"Asset_Name"])
print(count)
```
0 Binance Coin
Asset_ID 0
Count 0
Open 0
High 0
Low 0
Close 0
Volume 0
VWAP 0
Target 44
dtype: int64
1 Bitcoin
Asset_ID 0
Count 0
Open 0
High 0
Low 0
Close 0
Volume 0
VWAP 0
Target 16
dtype: int64
2 Bitcoin Cash
Asset_ID 0
Count 0
Open 0
High 0
Low 0
Close 0
Volume 0
VWAP 0
Target 16
dtype: int64
3 Cardano
Asset_ID 0
Count 0
Open 0
High 0
Low 0
Close 0
Volume 0
VWAP 0
Target 16
dtype: int64
4 Dogecoin
Asset_ID 0
Count 0
Open 0
High 0
Low 0
Close 0
Volume 0
VWAP 0
Target 16
dtype: int64
5 EOS.IO
Asset_ID 0
Count 0
Open 0
High 0
Low 0
Close 0
Volume 0
VWAP 0
Target 16
dtype: int64
6 Ethereum
Asset_ID 0
Count 0
Open 0
High 0
Low 0
Close 0
Volume 0
VWAP 0
Target 16
dtype: int64
7 Ethereum Classic
Asset_ID 0
Count 0
Open 0
High 0
Low 0
Close 0
Volume 0
VWAP 0
Target 16
dtype: int64
8 IOTA
Asset_ID 0
Count 0
Open 0
High 0
Low 0
Close 0
Volume 0
VWAP 0
Target 344
dtype: int64
9 Litecoin
Asset_ID 0
Count 0
Open 0
High 0
Low 0
Close 0
Volume 0
VWAP 0
Target 16
dtype: int64
10 Maker
Asset_ID 0
Count 0
Open 0
High 0
Low 0
Close 0
Volume 0
VWAP 0
Target 834
dtype: int64
11 Monero
Asset_ID 0
Count 0
Open 0
High 0
Low 0
Close 0
Volume 0
VWAP 0
Target 107
dtype: int64
12 Stellar
Asset_ID 0
Count 0
Open 0
High 0
Low 0
Close 0
Volume 0
VWAP 0
Target 16
dtype: int64
13 TRON
Asset_ID 0
Count 0
Open 0
High 0
Low 0
Close 0
Volume 0
VWAP 0
Target 29
dtype: int64
```python
for coin_id in range(14):
crypto_coins[coin_id] = crypto_coins[coin_id].fillna(method="ffill")
coin = crypto_coins[coin_id]
count = coin.isna().sum()
print("\n",coin_id,asset_details.loc[coin_id,"Asset_Name"])
print(count)
```
0 Binance Coin
Asset_ID 0
Count 0
Open 0
High 0
Low 0
Close 0
Volume 0
VWAP 0
Target 0
dtype: int64
1 Bitcoin
Asset_ID 0
Count 0
Open 0
High 0
Low 0
Close 0
Volume 0
VWAP 0
Target 0
dtype: int64
2 Bitcoin Cash
Asset_ID 0
Count 0
Open 0
High 0
Low 0
Close 0
Volume 0
VWAP 0
Target 0
dtype: int64
3 Cardano
Asset_ID 0
Count 0
Open 0
High 0
Low 0
Close 0
Volume 0
VWAP 0
Target 0
dtype: int64
4 Dogecoin
Asset_ID 0
Count 0
Open 0
High 0
Low 0
Close 0
Volume 0
VWAP 0
Target 0
dtype: int64
5 EOS.IO
Asset_ID 0
Count 0
Open 0
High 0
Low 0
Close 0
Volume 0
VWAP 0
Target 0
dtype: int64
6 Ethereum
Asset_ID 0
Count 0
Open 0
High 0
Low 0
Close 0
Volume 0
VWAP 0
Target 0
dtype: int64
7 Ethereum Classic
Asset_ID 0
Count 0
Open 0
High 0
Low 0
Close 0
Volume 0
VWAP 0
Target 0
dtype: int64
8 IOTA
Asset_ID 0
Count 0
Open 0
High 0
Low 0
Close 0
Volume 0
VWAP 0
Target 0
dtype: int64
9 Litecoin
Asset_ID 0
Count 0
Open 0
High 0
Low 0
Close 0
Volume 0
VWAP 0
Target 0
dtype: int64
10 Maker
Asset_ID 0
Count 0
Open 0
High 0
Low 0
Close 0
Volume 0
VWAP 0
Target 0
dtype: int64
11 Monero
Asset_ID 0
Count 0
Open 0
High 0
Low 0
Close 0
Volume 0
VWAP 0
Target 0
dtype: int64
12 Stellar
Asset_ID 0
Count 0
Open 0
High 0
Low 0
Close 0
Volume 0
VWAP 0
Target 0
dtype: int64
13 TRON
Asset_ID 0
Count 0
Open 0
High 0
Low 0
Close 0
Volume 0
VWAP 0
Target 0
dtype: int64
```python
crypto_coins[0].loc[1632181500:].shape
```
(16, 9)
```python
def series_split(x,n_steps):
batch_size = x.shape[0]-n_steps
print(x.shape)
dim = x.shape[1]
x_train = np.zeros((batch_size,n_steps,dim))
y_train = np.zeros((batch_size,n_steps,dim))
for j in range(batch_size):
x_train[j] = x[j:j+n_steps,:]
y_train[j] = x[j+1:j+n_steps+1,:]
return x_train,y_train
data = crypto_coins[0]
scaler = StandardScaler().fit(data)
data = scaler.transform(data)
x_train,y_train = series_split(data,60)
x_train.shape
```
(100000, 9)
(99940, 60, 9)
```python
dim = x_train.shape[2]
```
```python
class LNSimpleRNNCell(keras.layers.Layer):
def __init__(self, units, activation="tanh", **kwargs):
super().__init__(**kwargs)
self.state_size = units
self.output_size = units
self.simple_rnn_cell = keras.layers.SimpleRNNCell(units,activation=None)
self.layer_norm = LayerNormalization()
self.activation = keras.activations.get(activation)
def get_initial_state(self, inputs=None, batch_size=None, dtype=None):
if inputs is not None:
batch_size = tf.shape(inputs)[0]
dtype = inputs.dtype
return [tf.zeros([batch_size, self.state_size], dtype=dtype)]
def call(self, inputs, states):
outputs, new_states = self.simple_rnn_cell(inputs, states)
norm_outputs = self.activation(self.layer_norm(outputs))
return norm_outputs, [norm_outputs]
model = keras.models.Sequential([
keras.layers.LSTM(256, return_sequences=True, input_shape=[None, dim]),
keras.layers.Dropout(0.2),
keras.layers.LSTM(256, return_sequences=True),
keras.layers.Dropout(0.2),
keras.layers.TimeDistributed(keras.layers.Dense(dim))
])
epoch = 10000
callback = tf.keras.callbacks.EarlyStopping(
monitor='val_loss',
patience=4,
mode='auto',
restore_best_weights=True
)
opt = tf.keras.optimizers.Adam(
learning_rate=0.01
)
model.compile(loss="mse", optimizer=opt)
history = model.fit(x_train, y_train, validation_split=0.2, epochs=epoch, callbacks = [callback])
```
2022-01-07 13:17:07.346509: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2022-01-07 13:17:07.367970: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 1699915000 Hz
2022-01-07 13:17:07.368581: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x44e2690 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2022-01-07 13:17:07.368609: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version
2022-01-07 13:17:07.532610: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x3b8f330 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2022-01-07 13:17:07.532658: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): GeForce GTX 1080, Compute Capability 6.1
2022-01-07 13:17:07.536987: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties:
pciBusID: 0000:04:00.0 name: GeForce GTX 1080 computeCapability: 6.1
coreClock: 1.7335GHz coreCount: 20 deviceMemorySize: 7.93GiB deviceMemoryBandwidth: 298.32GiB/s
2022-01-07 13:17:07.537138: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
2022-01-07 13:17:07.537170: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2022-01-07 13:17:07.537275: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
2022-01-07 13:17:07.537317: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10
2022-01-07 13:17:07.537356: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10
2022-01-07 13:17:07.537393: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10
2022-01-07 13:17:07.537417: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
2022-01-07 13:17:07.541092: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0
2022-01-07 13:17:07.541163: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
2022-01-07 13:17:07.869887: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix:
2022-01-07 13:17:07.869954: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 0
2022-01-07 13:17:07.869968: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 0: N
2022-01-07 13:17:07.873333: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 7568 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1080, pci bus id: 0000:04:00.0, compute capability: 6.1)
Train on 79952 samples, validate on 19988 samples
Epoch 1/10000
2022-01-07 13:17:13.516412: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2022-01-07 13:17:14.141337: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
79952/79952 [==============================] - 43s 533us/sample - loss: 0.1120 - val_loss: 0.1723
Epoch 2/10000
79952/79952 [==============================] - 36s 449us/sample - loss: 0.0724 - val_loss: 0.1773
Epoch 3/10000
79952/79952 [==============================] - 37s 460us/sample - loss: 0.0608 - val_loss: 0.1799
Epoch 4/10000
79952/79952 [==============================] - 36s 450us/sample - loss: 0.0546 - val_loss: 0.1807
Epoch 5/10000
79952/79952 [==============================] - 37s 463us/sample - loss: 0.1299 - val_loss: 0.2197
```python
class LNSimpleRNNCell(keras.layers.Layer):
def __init__(self, units, activation="tanh", **kwargs):
super().__init__(**kwargs)
self.state_size = units
self.output_size = units
self.simple_rnn_cell = keras.layers.SimpleRNNCell(units,activation=None)
self.layer_norm = LayerNormalization()
self.activation = keras.activations.get(activation)
def get_initial_state(self, inputs=None, batch_size=None, dtype=None):
if inputs is not None:
batch_size = tf.shape(inputs)[0]
dtype = inputs.dtype
return [tf.zeros([batch_size, self.state_size], dtype=dtype)]
def call(self, inputs, states):
outputs, new_states = self.simple_rnn_cell(inputs, states)
norm_outputs = self.activation(self.layer_norm(outputs))
return norm_outputs, [norm_outputs]
model = keras.models.Sequential([
keras.layers.LSTM(256, return_sequences=True, input_shape=[None, dim]),
keras.layers.Dropout(0.2),
keras.layers.LSTM(256, return_sequences=True),
keras.layers.Dropout(0.2),
keras.layers.TimeDistributed(keras.layers.Dense(dim))
])
epoch = 10000
callback = tf.keras.callbacks.EarlyStopping(
monitor='val_loss',
patience=4,
mode='auto',
restore_best_weights=True
)
opt = tf.keras.optimizers.Adam(
learning_rate=0.01
)
model.compile(loss="mse", optimizer=opt)
history = model.fit(x_train, y_train, validation_split=0.2, epochs=epoch, callbacks = [callback])
```
2022-01-07 11:07:23.270573: W tensorflow/core/framework/cpu_allocator_impl.cc:81] Allocation of 57594240 exceeds 10% of system memory.
2022-01-07 11:07:23.363853: W tensorflow/core/framework/cpu_allocator_impl.cc:81] Allocation of 57594240 exceeds 10% of system memory.
Train on 79992 samples, validate on 19998 samples
Epoch 1/10000
79992/79992 [==============================] - 30s 369us/sample - loss: 0.1469 - val_loss: 0.1676
Epoch 2/10000
79992/79992 [==============================] - 25s 314us/sample - loss: 0.1339 - val_loss: 0.1737
Epoch 3/10000
79992/79992 [==============================] - 25s 319us/sample - loss: 0.1291 - val_loss: 0.1768
Epoch 4/10000
79992/79992 [==============================] - 25s 318us/sample - loss: 0.1231 - val_loss: 0.1768
Epoch 5/10000
79992/79992 [==============================] - 25s 316us/sample - loss: 0.1190 - val_loss: 0.1748
```python
import gresearch_crypto
env = gresearch_crypto.make_env()
iter_test = env.iter_test()
```
```python
for (test_df, sample_prediction_df) in iter_test:
print(test_df)
sample_prediction_df['Target'] = 0
print(sample_prediction_df)
print(sample_prediction_df['Target'].dtypes)
env.predict(sample_prediction_df)
```
This version of the API is not optimized and should not be used to estimate the runtime of your code on the hidden test set.
timestamp Asset_ID Count Open High Low \
0 1623542400 3 1201 1.478556 1.486030 1.478000
1 1623542400 2 1020 580.306667 583.890000 579.910000
2 1623542400 0 626 343.789500 345.108000 343.640000
3 1623542400 1 2888 35554.289632 35652.464650 35502.670000
4 1623542400 4 433 0.312167 0.312600 0.311920
5 1623542400 5 359 4.832550 4.845900 4.822900
6 1623542400 7 541 55.223080 55.494000 55.182000
7 1623542400 6 2186 2371.194286 2379.200000 2369.670000
8 1623542400 8 35 1.003150 1.019800 0.987300
9 1623542400 9 560 161.933429 162.480000 161.730000
10 1623542400 10 61 2939.862750 2952.160000 2936.230000
11 1623542400 13 229 0.068132 0.068240 0.068038
12 1623542400 12 383 0.327973 0.329272 0.327650
13 1623542400 11 123 243.137500 243.810000 242.960000
Close Volume VWAP row_id
0 1.483681 6.547996e+05 1.481439 0
1 582.276667 1.227988e+03 581.697038 1
2 344.598000 1.718833e+03 344.441729 2
3 35602.004286 1.638115e+02 35583.469303 3
4 0.312208 5.855774e+05 0.312154 4
5 4.837583 4.714355e+04 4.836607 5
6 55.344680 6.625202e+03 55.298816 6
7 2374.380714 1.214129e+03 2374.335307 7
8 1.003300 7.061928e+03 1.002936 8
9 162.214714 1.485009e+03 162.231310 9
10 2947.078025 9.584785e+00 2945.110614 10
11 0.068158 3.046438e+06 0.068158 11
12 0.328829 5.364911e+05 0.328582 12
13 243.532500 3.079589e+02 243.452697 13
row_id Target
0 0 0
1 1 0
2 2 0
3 3 0
4 4 0
5 5 0
6 6 0
7 7 0
8 8 0
9 9 0
10 10 0
11 11 0
12 12 0
13 13 0
int64
timestamp Asset_ID Count Open High Low \
0 1623542460 3 672 1.482410 1.483759 1.47920
1 1623542460 2 1251 581.800000 585.590000 580.38000
2 1623542460 0 458 344.353500 344.790000 343.62000
3 1623542460 1 2006 35596.771429 35621.000000 35533.38000
4 1623542460 4 573 0.312274 0.312400 0.31154
5 1623542460 5 644 4.834550 4.849500 4.81850
6 1623542460 7 530 55.320900 55.426600 55.14900
7 1623542460 6 1261 2373.970101 2375.350000 2369.37000
8 1623542460 8 98 1.003050 1.019600 0.98410
9 1623542460 9 381 162.141857 162.380000 161.80000
10 1623542460 10 35 2946.370567 2949.720000 2943.71000
11 1623542460 13 395 0.068144 0.068334 0.06804
12 1623542460 12 287 0.328656 0.329280 0.32778
13 1623542460 11 62 243.368000 243.500000 242.94000
Close Volume VWAP row_id
0 1.482043 2.858286e+05 1.481495 14
1 582.358333 1.405285e+03 583.451389 15
2 344.089500 1.217352e+03 344.188716 16
3 35555.397143 9.336366e+01 35584.861196 17
4 0.311847 9.396433e+05 0.311930 18
5 4.838300 8.678307e+04 4.837430 19
6 55.286400 6.929458e+03 55.265491 20
7 2371.790000 7.867385e+02 2372.809830 21
8 1.001550 4.771951e+04 1.001078 22
9 162.193000 1.344810e+03 162.121310 23
10 2945.711900 2.794134e+00 2946.633163 24
11 0.068217 4.981365e+06 0.068201 25
12 0.328563 4.496390e+05 0.328548 26
13 243.330000 1.435053e+02 243.337398 27
row_id Target
0 14 0
1 15 0
2 16 0
3 17 0
4 18 0
5 19 0
6 20 0
7 21 0
8 22 0
9 23 0
10 24 0
11 25 0
12 26 0
13 27 0
int64
timestamp Asset_ID Count Open High Low \
0 1623542520 3 849 1.481493 1.482896 1.477801
1 1623542520 2 540 582.100000 582.670000 579.620000
2 1623542520 0 535 343.778000 344.060000 342.500000
3 1623542520 1 3531 35550.271250 35576.590000 35402.870000
4 1623542520 4 1667 0.311670 0.312000 0.310430
5 1623542520 5 380 4.833440 4.835700 4.814300
6 1623542520 7 825 55.225700 55.309600 54.910000
7 1623542520 6 1856 2370.880011 2371.950076 2363.000000
8 1623542520 8 53 1.000450 1.016400 0.981700
9 1623542520 9 439 161.876143 162.030000 161.460000
10 1623542520 10 56 2939.582925 2941.808300 2930.030000
11 1623542520 13 338 0.068159 0.068170 0.067950
12 1623542520 12 264 0.328061 0.328370 0.326973
13 1623542520 11 49 242.745000 243.000000 242.230000
Close Volume VWAP row_id
0 1.479259 4.868546e+05 1.480640 28
1 580.540000 4.723730e+02 580.850017 29
2 343.009500 9.757970e+02 343.139652 30
3 35488.287500 2.205352e+02 35480.068897 31
4 0.311006 3.416122e+06 0.311131 32
5 4.822080 4.128848e+04 4.824327 33
6 55.026050 1.231549e+04 55.095780 34
7 2365.590000 7.640805e+02 2367.128372 35
8 0.997500 1.216251e+04 0.998949 36
9 161.829143 1.265969e+03 161.737656 37
10 2933.232000 6.084640e+00 2937.153112 38
11 0.068025 2.790134e+06 0.068057 39
12 0.327497 7.424868e+05 0.327676 40
13 242.347500 5.420834e+01 242.451070 41
row_id Target
0 28 0
1 29 0
2 30 0
3 31 0
4 32 0
5 33 0
6 34 0
7 35 0
8 36 0
9 37 0
10 38 0
11 39 0
12 40 0
13 41 0
int64
timestamp Asset_ID Count Open High Low \
0 1623542580 3 1023 1.479075 1.479399 1.472600
1 1623542580 2 409 580.490000 580.690000 578.310000
2 1623542580 0 614 343.238000 343.406000 341.940000
3 1623542580 1 2901 35478.867162 35503.460134 35381.010000
4 1623542580 4 1094 0.310923 0.311400 0.310400
5 1623542580 5 465 4.819883 4.822100 4.808300
6 1623542580 7 347 54.997567 55.044200 54.916800
7 1623542580 6 2624 2365.769427 2367.500000 2359.010000
8 1623542580 8 169 0.996600 1.012900 0.975100
9 1623542580 9 431 161.862333 161.930000 161.270000
10 1623542580 10 34 2930.150250 2931.520000 2921.660000
11 1623542580 13 240 0.068015 0.068055 0.067866
12 1623542580 12 276 0.327267 0.327410 0.326130
13 1623542580 11 107 242.360000 242.550000 241.700000
Close Volume VWAP row_id
0 1.473527 3.286849e+05 1.476372 42
1 578.912000 2.045210e+02 579.470144 43
2 342.325000 1.295199e+03 342.525876 44
3 35423.490000 1.188025e+02 35438.243466 45
4 0.310676 2.403980e+06 0.310894 46
5 4.813450 3.476754e+04 4.815294 47
6 54.940950 2.735096e+03 54.974253 48
7 2360.505714 2.253663e+03 2362.394059 49
8 0.991000 6.251820e+04 0.992974 50
9 161.485000 1.156636e+03 161.595349 51
10 2925.073700 1.187095e+00 2926.427322 52
11 0.067936 2.572088e+06 0.067958 53
12 0.326406 2.571778e+05 0.326834 54
13 242.074000 1.393091e+02 242.123168 55
row_id Target
0 42 0
1 43 0
2 44 0
3 45 0
4 46 0
5 47 0
6 48 0
7 49 0
8 50 0
9 51 0
10 52 0
11 53 0
12 54 0
13 55 0
int64
```python
```