# data preprossong ###### tags: `Code` ```python import tensorflow as tf from tensorflow import keras from sklearn.preprocessing import StandardScaler tf.config.list_physical_devices() ``` ```python import pandas as pd import numpy as np from datetime import datetime ``` ```python data_folder = "./g-research-crypto-forecasting/" !ls $data_folder ``` asset_details.csv example_test.csv supplemental_train.csv detailed-api-introduction.ipynb gresearch_crypto train.csv example_sample_submission.csv submission.csv ```python crypto_df = pd.read_csv(data_folder + 'train.csv') crypto_df ``` <div> <style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style> <table border="1" class="dataframe"> <thead> <tr style="text-align: right;"> <th></th> <th>timestamp</th> <th>Asset_ID</th> <th>Count</th> <th>Open</th> <th>High</th> <th>Low</th> <th>Close</th> <th>Volume</th> <th>VWAP</th> <th>Target</th> </tr> </thead> <tbody> <tr> <th>0</th> <td>1514764860</td> <td>2</td> <td>40.0</td> <td>2376.580000</td> <td>2399.500000</td> <td>2357.140000</td> <td>2374.590000</td> <td>1.923301e+01</td> <td>2373.116392</td> <td>-0.004218</td> </tr> <tr> <th>1</th> <td>1514764860</td> <td>0</td> <td>5.0</td> <td>8.530000</td> <td>8.530000</td> <td>8.530000</td> <td>8.530000</td> <td>7.838000e+01</td> <td>8.530000</td> <td>-0.014399</td> </tr> <tr> <th>2</th> <td>1514764860</td> <td>1</td> <td>229.0</td> <td>13835.194000</td> <td>14013.800000</td> <td>13666.110000</td> <td>13850.176000</td> <td>3.155006e+01</td> <td>13827.062093</td> <td>-0.014643</td> </tr> <tr> <th>3</th> <td>1514764860</td> <td>5</td> <td>32.0</td> <td>7.659600</td> <td>7.659600</td> <td>7.656700</td> <td>7.657600</td> <td>6.626713e+03</td> <td>7.657713</td> <td>-0.013922</td> </tr> <tr> <th>4</th> <td>1514764860</td> <td>7</td> <td>5.0</td> <td>25.920000</td> <td>25.920000</td> <td>25.874000</td> <td>25.877000</td> <td>1.210873e+02</td> <td>25.891363</td> <td>-0.008264</td> </tr> <tr> <th>...</th> <td>...</td> <td>...</td> <td>...</td> <td>...</td> <td>...</td> <td>...</td> <td>...</td> <td>...</td> <td>...</td> <td>...</td> </tr> <tr> <th>24236801</th> <td>1632182400</td> <td>9</td> <td>775.0</td> <td>157.181571</td> <td>157.250000</td> <td>156.700000</td> <td>156.943857</td> <td>4.663725e+03</td> <td>156.994319</td> <td>NaN</td> </tr> <tr> <th>24236802</th> <td>1632182400</td> <td>10</td> <td>34.0</td> <td>2437.065067</td> <td>2438.000000</td> <td>2430.226900</td> <td>2432.907467</td> <td>3.975460e+00</td> <td>2434.818747</td> <td>NaN</td> </tr> <tr> <th>24236803</th> <td>1632182400</td> <td>13</td> <td>380.0</td> <td>0.091390</td> <td>0.091527</td> <td>0.091260</td> <td>0.091349</td> <td>2.193732e+06</td> <td>0.091388</td> <td>NaN</td> </tr> <tr> <th>24236804</th> <td>1632182400</td> <td>12</td> <td>177.0</td> <td>0.282168</td> <td>0.282438</td> <td>0.281842</td> <td>0.282051</td> <td>1.828508e+05</td> <td>0.282134</td> <td>NaN</td> </tr> <tr> <th>24236805</th> <td>1632182400</td> <td>11</td> <td>48.0</td> <td>232.695000</td> <td>232.800000</td> <td>232.240000</td> <td>232.275000</td> <td>1.035123e+02</td> <td>232.569697</td> <td>NaN</td> </tr> </tbody> </table> <p>24236806 rows × 10 columns</p> </div> ```python asset_details = pd.read_csv(data_folder + 'asset_details.csv').set_index("Asset_ID") asset_details ``` <div> <style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style> <table border="1" class="dataframe"> <thead> <tr style="text-align: right;"> <th></th> <th>Weight</th> <th>Asset_Name</th> </tr> <tr> <th>Asset_ID</th> <th></th> <th></th> </tr> </thead> <tbody> <tr> <th>2</th> <td>2.397895</td> <td>Bitcoin Cash</td> </tr> <tr> <th>0</th> <td>4.304065</td> <td>Binance Coin</td> </tr> <tr> <th>1</th> <td>6.779922</td> <td>Bitcoin</td> </tr> <tr> <th>5</th> <td>1.386294</td> <td>EOS.IO</td> </tr> <tr> <th>7</th> <td>2.079442</td> <td>Ethereum Classic</td> </tr> <tr> <th>6</th> <td>5.894403</td> <td>Ethereum</td> </tr> <tr> <th>9</th> <td>2.397895</td> <td>Litecoin</td> </tr> <tr> <th>11</th> <td>1.609438</td> <td>Monero</td> </tr> <tr> <th>13</th> <td>1.791759</td> <td>TRON</td> </tr> <tr> <th>12</th> <td>2.079442</td> <td>Stellar</td> </tr> <tr> <th>3</th> <td>4.406719</td> <td>Cardano</td> </tr> <tr> <th>8</th> <td>1.098612</td> <td>IOTA</td> </tr> <tr> <th>10</th> <td>1.098612</td> <td>Maker</td> </tr> <tr> <th>4</th> <td>3.555348</td> <td>Dogecoin</td> </tr> </tbody> </table> </div> ```python btc = crypto_df[crypto_df["Asset_ID"]==1].set_index("timestamp") # Asset_ID = 1 for Bitcoin btc.iloc[-20:] # Select recent data rows ``` <div> <style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style> <table border="1" class="dataframe"> <thead> <tr style="text-align: right;"> <th></th> <th>Asset_ID</th> <th>Count</th> <th>Open</th> <th>High</th> <th>Low</th> <th>Close</th> <th>Volume</th> <th>VWAP</th> <th>Target</th> </tr> <tr> <th>timestamp</th> <th></th> <th></th> <th></th> <th></th> <th></th> <th></th> <th></th> <th></th> <th></th> </tr> </thead> <tbody> <tr> <th>1632181260</th> <td>1</td> <td>2281.0</td> <td>42718.815000</td> <td>42819.380000</td> <td>42690.84</td> <td>42781.970571</td> <td>76.339988</td> <td>42755.785162</td> <td>0.003246</td> </tr> <tr> <th>1632181320</th> <td>1</td> <td>2642.0</td> <td>42772.921250</td> <td>42827.100000</td> <td>42690.75</td> <td>42755.592500</td> <td>117.429123</td> <td>42749.075916</td> <td>0.003108</td> </tr> <tr> <th>1632181380</th> <td>1</td> <td>2134.0</td> <td>42762.290000</td> <td>42811.300000</td> <td>42694.37</td> <td>42717.234286</td> <td>78.049458</td> <td>42749.024591</td> <td>0.002770</td> </tr> <tr> <th>1632181440</th> <td>1</td> <td>2165.0</td> <td>42703.802500</td> <td>42752.000000</td> <td>42636.76</td> <td>42657.202500</td> <td>86.518794</td> <td>42680.263993</td> <td>0.002726</td> </tr> <tr> <th>1632181500</th> <td>1</td> <td>2177.0</td> <td>42655.528484</td> <td>42715.990000</td> <td>42630.17</td> <td>42678.721429</td> <td>93.523647</td> <td>42679.036829</td> <td>NaN</td> </tr> <tr> <th>1632181560</th> <td>1</td> <td>2352.0</td> <td>42688.935000</td> <td>42810.260000</td> <td>42662.27</td> <td>42778.291250</td> <td>97.442472</td> <td>42739.891567</td> <td>NaN</td> </tr> <tr> <th>1632181620</th> <td>1</td> <td>2080.0</td> <td>42790.287143</td> <td>42828.000000</td> <td>42742.36</td> <td>42790.175714</td> <td>97.634234</td> <td>42794.666167</td> <td>NaN</td> </tr> <tr> <th>1632181680</th> <td>1</td> <td>2058.0</td> <td>42792.855714</td> <td>42827.700000</td> <td>42713.48</td> <td>42731.731429</td> <td>78.468814</td> <td>42786.068940</td> <td>NaN</td> </tr> <tr> <th>1632181740</th> <td>1</td> <td>3717.0</td> <td>42745.153750</td> <td>42937.310000</td> <td>42713.48</td> <td>42898.565000</td> <td>116.567006</td> <td>42828.789861</td> <td>NaN</td> </tr> <tr> <th>1632181800</th> <td>1</td> <td>3253.0</td> <td>42896.174286</td> <td>42966.980000</td> <td>42852.07</td> <td>42939.645714</td> <td>113.220529</td> <td>42910.257438</td> <td>NaN</td> </tr> <tr> <th>1632181860</th> <td>1</td> <td>2264.0</td> <td>42937.784286</td> <td>42971.950000</td> <td>42899.10</td> <td>42927.167143</td> <td>56.655507</td> <td>42932.332787</td> <td>NaN</td> </tr> <tr> <th>1632181920</th> <td>1</td> <td>2824.0</td> <td>42933.254907</td> <td>43014.980000</td> <td>42907.18</td> <td>42979.833750</td> <td>97.404745</td> <td>42963.462903</td> <td>NaN</td> </tr> <tr> <th>1632181980</th> <td>1</td> <td>1697.0</td> <td>42974.108693</td> <td>43005.650000</td> <td>42929.74</td> <td>42960.601429</td> <td>58.419459</td> <td>42963.080444</td> <td>NaN</td> </tr> <tr> <th>1632182040</th> <td>1</td> <td>1439.0</td> <td>42971.347143</td> <td>43008.850000</td> <td>42953.10</td> <td>42985.482857</td> <td>39.329753</td> <td>42981.323152</td> <td>NaN</td> </tr> <tr> <th>1632182100</th> <td>1</td> <td>1608.0</td> <td>42982.915714</td> <td>43006.540000</td> <td>42958.06</td> <td>42985.214824</td> <td>47.701279</td> <td>42983.752149</td> <td>NaN</td> </tr> <tr> <th>1632182160</th> <td>1</td> <td>1940.0</td> <td>42983.780000</td> <td>43001.850849</td> <td>42878.26</td> <td>42899.012857</td> <td>56.850913</td> <td>42935.489499</td> <td>NaN</td> </tr> <tr> <th>1632182220</th> <td>1</td> <td>2026.0</td> <td>42904.197143</td> <td>42932.000000</td> <td>42840.16</td> <td>42860.005714</td> <td>80.993326</td> <td>42879.576084</td> <td>NaN</td> </tr> <tr> <th>1632182280</th> <td>1</td> <td>1986.0</td> <td>42859.385714</td> <td>42887.500000</td> <td>42797.20</td> <td>42827.020000</td> <td>65.677734</td> <td>42844.090693</td> <td>NaN</td> </tr> <tr> <th>1632182340</th> <td>1</td> <td>4047.0</td> <td>42839.012802</td> <td>43042.160000</td> <td>42818.10</td> <td>43017.277143</td> <td>138.335477</td> <td>42935.761938</td> <td>NaN</td> </tr> <tr> <th>1632182400</th> <td>1</td> <td>2698.0</td> <td>43009.961250</td> <td>43048.510000</td> <td>42961.64</td> <td>43002.505000</td> <td>128.206820</td> <td>43011.414052</td> <td>NaN</td> </tr> </tbody> </table> </div> ```python eth = crypto_df[crypto_df["Asset_ID"]==6].set_index("timestamp") # Asset_ID = 6 for Ethereum btc_mini = btc.iloc[-200:] # Select recent data rows btc_mini ``` <div> <style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style> <table border="1" class="dataframe"> <thead> <tr style="text-align: right;"> <th></th> <th>Asset_ID</th> <th>Count</th> <th>Open</th> <th>High</th> <th>Low</th> <th>Close</th> <th>Volume</th> <th>VWAP</th> <th>Target</th> </tr> <tr> <th>timestamp</th> <th></th> <th></th> <th></th> <th></th> <th></th> <th></th> <th></th> <th></th> <th></th> </tr> </thead> <tbody> <tr> <th>1632170460</th> <td>1</td> <td>5694.0</td> <td>43558.393750</td> <td>43588.680000</td> <td>43427.28</td> <td>43447.003750</td> <td>191.442881</td> <td>43493.981877</td> <td>-0.000805</td> </tr> <tr> <th>1632170520</th> <td>1</td> <td>2528.0</td> <td>43449.818571</td> <td>43495.954028</td> <td>43414.33</td> <td>43440.000000</td> <td>80.692018</td> <td>43455.988146</td> <td>0.000418</td> </tr> <tr> <th>1632170580</th> <td>1</td> <td>3985.0</td> <td>43427.082310</td> <td>43455.218478</td> <td>43327.29</td> <td>43360.723750</td> <td>117.332513</td> <td>43391.956795</td> <td>0.000125</td> </tr> <tr> <th>1632170640</th> <td>1</td> <td>2185.0</td> <td>43362.572857</td> <td>43454.990000</td> <td>43348.06</td> <td>43427.242857</td> <td>76.361478</td> <td>43403.880056</td> <td>0.000956</td> </tr> <tr> <th>1632170700</th> <td>1</td> <td>2623.0</td> <td>43419.832500</td> <td>43467.000000</td> <td>43351.15</td> <td>43375.040000</td> <td>76.018320</td> <td>43408.267118</td> <td>-0.000430</td> </tr> <tr> <th>...</th> <td>...</td> <td>...</td> <td>...</td> <td>...</td> <td>...</td> <td>...</td> <td>...</td> <td>...</td> <td>...</td> </tr> <tr> <th>1632182160</th> <td>1</td> <td>1940.0</td> <td>42983.780000</td> <td>43001.850849</td> <td>42878.26</td> <td>42899.012857</td> <td>56.850913</td> <td>42935.489499</td> <td>NaN</td> </tr> <tr> <th>1632182220</th> <td>1</td> <td>2026.0</td> <td>42904.197143</td> <td>42932.000000</td> <td>42840.16</td> <td>42860.005714</td> <td>80.993326</td> <td>42879.576084</td> <td>NaN</td> </tr> <tr> <th>1632182280</th> <td>1</td> <td>1986.0</td> <td>42859.385714</td> <td>42887.500000</td> <td>42797.20</td> <td>42827.020000</td> <td>65.677734</td> <td>42844.090693</td> <td>NaN</td> </tr> <tr> <th>1632182340</th> <td>1</td> <td>4047.0</td> <td>42839.012802</td> <td>43042.160000</td> <td>42818.10</td> <td>43017.277143</td> <td>138.335477</td> <td>42935.761938</td> <td>NaN</td> </tr> <tr> <th>1632182400</th> <td>1</td> <td>2698.0</td> <td>43009.961250</td> <td>43048.510000</td> <td>42961.64</td> <td>43002.505000</td> <td>128.206820</td> <td>43011.414052</td> <td>NaN</td> </tr> </tbody> </table> <p>200 rows × 9 columns</p> </div> ```python eth = crypto_df[crypto_df["Asset_ID"]==6].set_index("timestamp") # Asset_ID = 6 for Ethereum eth.info(show_counts =True) ``` <class 'pandas.core.frame.DataFrame'> Int64Index: 1956200 entries, 1514764860 to 1632182400 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Asset_ID 1956200 non-null int64 1 Count 1956200 non-null float64 2 Open 1956200 non-null float64 3 High 1956200 non-null float64 4 Low 1956200 non-null float64 5 Close 1956200 non-null float64 6 Volume 1956200 non-null float64 7 VWAP 1956200 non-null float64 8 Target 1955860 non-null float64 dtypes: float64(8), int64(1) memory usage: 149.2 MB ```python ``` ```python beg_btc = btc.index[0].astype('datetime64[s]') end_btc = btc.index[-1].astype('datetime64[s]') beg_eth = eth.index[0].astype('datetime64[s]') end_eth = eth.index[-1].astype('datetime64[s]') print('BTC data goes from ', beg_btc, 'to ', end_btc) print('Ethereum data goes from ', beg_eth, 'to ', end_eth) ``` BTC data goes from 2018-01-01T00:01:00 to 2021-09-21T00:00:00 Ethereum data goes from 2018-01-01T00:01:00 to 2021-09-21T00:00:00 ```python crypto_coins = {} for coin_id in range(14): coin = crypto_df[crypto_df["Asset_ID"]==coin_id].set_index("timestamp") coin = coin.reindex(range(coin.index[0],coin.index[-1]+60,60),method='pad') crypto_coins[coin_id] = coin[-100000:] ``` ```python ``` ```python for coin_id in range(14): coin = crypto_coins[coin_id] count =(coin.index[1:]-coin.index[:-1]).value_counts(sort=True) print("\n",coin_id) print(asset_details.loc[coin_id,"Asset_Name"]) print(count) ``` 0 Binance Coin 60 99999 Name: timestamp, dtype: int64 1 Bitcoin 60 99999 Name: timestamp, dtype: int64 2 Bitcoin Cash 60 99999 Name: timestamp, dtype: int64 3 Cardano 60 99999 Name: timestamp, dtype: int64 4 Dogecoin 60 99999 Name: timestamp, dtype: int64 5 EOS.IO 60 99999 Name: timestamp, dtype: int64 6 Ethereum 60 99999 Name: timestamp, dtype: int64 7 Ethereum Classic 60 99999 Name: timestamp, dtype: int64 8 IOTA 60 99999 Name: timestamp, dtype: int64 9 Litecoin 60 99999 Name: timestamp, dtype: int64 10 Maker 60 99999 Name: timestamp, dtype: int64 11 Monero 60 99999 Name: timestamp, dtype: int64 12 Stellar 60 99999 Name: timestamp, dtype: int64 13 TRON 60 99999 Name: timestamp, dtype: int64 ```python for coin_id in range(14): coin = crypto_coins[coin_id] count = coin.isna().sum() print("\n",coin_id,asset_details.loc[coin_id,"Asset_Name"]) print(count) ``` 0 Binance Coin Asset_ID 0 Count 0 Open 0 High 0 Low 0 Close 0 Volume 0 VWAP 0 Target 44 dtype: int64 1 Bitcoin Asset_ID 0 Count 0 Open 0 High 0 Low 0 Close 0 Volume 0 VWAP 0 Target 16 dtype: int64 2 Bitcoin Cash Asset_ID 0 Count 0 Open 0 High 0 Low 0 Close 0 Volume 0 VWAP 0 Target 16 dtype: int64 3 Cardano Asset_ID 0 Count 0 Open 0 High 0 Low 0 Close 0 Volume 0 VWAP 0 Target 16 dtype: int64 4 Dogecoin Asset_ID 0 Count 0 Open 0 High 0 Low 0 Close 0 Volume 0 VWAP 0 Target 16 dtype: int64 5 EOS.IO Asset_ID 0 Count 0 Open 0 High 0 Low 0 Close 0 Volume 0 VWAP 0 Target 16 dtype: int64 6 Ethereum Asset_ID 0 Count 0 Open 0 High 0 Low 0 Close 0 Volume 0 VWAP 0 Target 16 dtype: int64 7 Ethereum Classic Asset_ID 0 Count 0 Open 0 High 0 Low 0 Close 0 Volume 0 VWAP 0 Target 16 dtype: int64 8 IOTA Asset_ID 0 Count 0 Open 0 High 0 Low 0 Close 0 Volume 0 VWAP 0 Target 344 dtype: int64 9 Litecoin Asset_ID 0 Count 0 Open 0 High 0 Low 0 Close 0 Volume 0 VWAP 0 Target 16 dtype: int64 10 Maker Asset_ID 0 Count 0 Open 0 High 0 Low 0 Close 0 Volume 0 VWAP 0 Target 834 dtype: int64 11 Monero Asset_ID 0 Count 0 Open 0 High 0 Low 0 Close 0 Volume 0 VWAP 0 Target 107 dtype: int64 12 Stellar Asset_ID 0 Count 0 Open 0 High 0 Low 0 Close 0 Volume 0 VWAP 0 Target 16 dtype: int64 13 TRON Asset_ID 0 Count 0 Open 0 High 0 Low 0 Close 0 Volume 0 VWAP 0 Target 29 dtype: int64 ```python for coin_id in range(14): crypto_coins[coin_id] = crypto_coins[coin_id].fillna(method="ffill") coin = crypto_coins[coin_id] count = coin.isna().sum() print("\n",coin_id,asset_details.loc[coin_id,"Asset_Name"]) print(count) ``` 0 Binance Coin Asset_ID 0 Count 0 Open 0 High 0 Low 0 Close 0 Volume 0 VWAP 0 Target 0 dtype: int64 1 Bitcoin Asset_ID 0 Count 0 Open 0 High 0 Low 0 Close 0 Volume 0 VWAP 0 Target 0 dtype: int64 2 Bitcoin Cash Asset_ID 0 Count 0 Open 0 High 0 Low 0 Close 0 Volume 0 VWAP 0 Target 0 dtype: int64 3 Cardano Asset_ID 0 Count 0 Open 0 High 0 Low 0 Close 0 Volume 0 VWAP 0 Target 0 dtype: int64 4 Dogecoin Asset_ID 0 Count 0 Open 0 High 0 Low 0 Close 0 Volume 0 VWAP 0 Target 0 dtype: int64 5 EOS.IO Asset_ID 0 Count 0 Open 0 High 0 Low 0 Close 0 Volume 0 VWAP 0 Target 0 dtype: int64 6 Ethereum Asset_ID 0 Count 0 Open 0 High 0 Low 0 Close 0 Volume 0 VWAP 0 Target 0 dtype: int64 7 Ethereum Classic Asset_ID 0 Count 0 Open 0 High 0 Low 0 Close 0 Volume 0 VWAP 0 Target 0 dtype: int64 8 IOTA Asset_ID 0 Count 0 Open 0 High 0 Low 0 Close 0 Volume 0 VWAP 0 Target 0 dtype: int64 9 Litecoin Asset_ID 0 Count 0 Open 0 High 0 Low 0 Close 0 Volume 0 VWAP 0 Target 0 dtype: int64 10 Maker Asset_ID 0 Count 0 Open 0 High 0 Low 0 Close 0 Volume 0 VWAP 0 Target 0 dtype: int64 11 Monero Asset_ID 0 Count 0 Open 0 High 0 Low 0 Close 0 Volume 0 VWAP 0 Target 0 dtype: int64 12 Stellar Asset_ID 0 Count 0 Open 0 High 0 Low 0 Close 0 Volume 0 VWAP 0 Target 0 dtype: int64 13 TRON Asset_ID 0 Count 0 Open 0 High 0 Low 0 Close 0 Volume 0 VWAP 0 Target 0 dtype: int64 ```python crypto_coins[0].loc[1632181500:].shape ``` (16, 9) ```python def series_split(x,n_steps): batch_size = x.shape[0]-n_steps print(x.shape) dim = x.shape[1] x_train = np.zeros((batch_size,n_steps,dim)) y_train = np.zeros((batch_size,n_steps,dim)) for j in range(batch_size): x_train[j] = x[j:j+n_steps,:] y_train[j] = x[j+1:j+n_steps+1,:] return x_train,y_train data = crypto_coins[0] scaler = StandardScaler().fit(data) data = scaler.transform(data) x_train,y_train = series_split(data,60) x_train.shape ``` (100000, 9) (99940, 60, 9) ```python dim = x_train.shape[2] ``` ```python class LNSimpleRNNCell(keras.layers.Layer): def __init__(self, units, activation="tanh", **kwargs): super().__init__(**kwargs) self.state_size = units self.output_size = units self.simple_rnn_cell = keras.layers.SimpleRNNCell(units,activation=None) self.layer_norm = LayerNormalization() self.activation = keras.activations.get(activation) def get_initial_state(self, inputs=None, batch_size=None, dtype=None): if inputs is not None: batch_size = tf.shape(inputs)[0] dtype = inputs.dtype return [tf.zeros([batch_size, self.state_size], dtype=dtype)] def call(self, inputs, states): outputs, new_states = self.simple_rnn_cell(inputs, states) norm_outputs = self.activation(self.layer_norm(outputs)) return norm_outputs, [norm_outputs] model = keras.models.Sequential([ keras.layers.LSTM(256, return_sequences=True, input_shape=[None, dim]), keras.layers.Dropout(0.2), keras.layers.LSTM(256, return_sequences=True), keras.layers.Dropout(0.2), keras.layers.TimeDistributed(keras.layers.Dense(dim)) ]) epoch = 10000 callback = tf.keras.callbacks.EarlyStopping( monitor='val_loss', patience=4, mode='auto', restore_best_weights=True ) opt = tf.keras.optimizers.Adam( learning_rate=0.01 ) model.compile(loss="mse", optimizer=opt) history = model.fit(x_train, y_train, validation_split=0.2, epochs=epoch, callbacks = [callback]) ``` 2022-01-07 13:17:07.346509: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA 2022-01-07 13:17:07.367970: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 1699915000 Hz 2022-01-07 13:17:07.368581: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x44e2690 initialized for platform Host (this does not guarantee that XLA will be used). Devices: 2022-01-07 13:17:07.368609: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version 2022-01-07 13:17:07.532610: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x3b8f330 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices: 2022-01-07 13:17:07.532658: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): GeForce GTX 1080, Compute Capability 6.1 2022-01-07 13:17:07.536987: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties: pciBusID: 0000:04:00.0 name: GeForce GTX 1080 computeCapability: 6.1 coreClock: 1.7335GHz coreCount: 20 deviceMemorySize: 7.93GiB deviceMemoryBandwidth: 298.32GiB/s 2022-01-07 13:17:07.537138: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1 2022-01-07 13:17:07.537170: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10 2022-01-07 13:17:07.537275: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10 2022-01-07 13:17:07.537317: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10 2022-01-07 13:17:07.537356: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10 2022-01-07 13:17:07.537393: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10 2022-01-07 13:17:07.537417: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7 2022-01-07 13:17:07.541092: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0 2022-01-07 13:17:07.541163: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1 2022-01-07 13:17:07.869887: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix: 2022-01-07 13:17:07.869954: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 0 2022-01-07 13:17:07.869968: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 0: N 2022-01-07 13:17:07.873333: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 7568 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1080, pci bus id: 0000:04:00.0, compute capability: 6.1) Train on 79952 samples, validate on 19988 samples Epoch 1/10000 2022-01-07 13:17:13.516412: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10 2022-01-07 13:17:14.141337: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7 79952/79952 [==============================] - 43s 533us/sample - loss: 0.1120 - val_loss: 0.1723 Epoch 2/10000 79952/79952 [==============================] - 36s 449us/sample - loss: 0.0724 - val_loss: 0.1773 Epoch 3/10000 79952/79952 [==============================] - 37s 460us/sample - loss: 0.0608 - val_loss: 0.1799 Epoch 4/10000 79952/79952 [==============================] - 36s 450us/sample - loss: 0.0546 - val_loss: 0.1807 Epoch 5/10000 79952/79952 [==============================] - 37s 463us/sample - loss: 0.1299 - val_loss: 0.2197 ```python class LNSimpleRNNCell(keras.layers.Layer): def __init__(self, units, activation="tanh", **kwargs): super().__init__(**kwargs) self.state_size = units self.output_size = units self.simple_rnn_cell = keras.layers.SimpleRNNCell(units,activation=None) self.layer_norm = LayerNormalization() self.activation = keras.activations.get(activation) def get_initial_state(self, inputs=None, batch_size=None, dtype=None): if inputs is not None: batch_size = tf.shape(inputs)[0] dtype = inputs.dtype return [tf.zeros([batch_size, self.state_size], dtype=dtype)] def call(self, inputs, states): outputs, new_states = self.simple_rnn_cell(inputs, states) norm_outputs = self.activation(self.layer_norm(outputs)) return norm_outputs, [norm_outputs] model = keras.models.Sequential([ keras.layers.LSTM(256, return_sequences=True, input_shape=[None, dim]), keras.layers.Dropout(0.2), keras.layers.LSTM(256, return_sequences=True), keras.layers.Dropout(0.2), keras.layers.TimeDistributed(keras.layers.Dense(dim)) ]) epoch = 10000 callback = tf.keras.callbacks.EarlyStopping( monitor='val_loss', patience=4, mode='auto', restore_best_weights=True ) opt = tf.keras.optimizers.Adam( learning_rate=0.01 ) model.compile(loss="mse", optimizer=opt) history = model.fit(x_train, y_train, validation_split=0.2, epochs=epoch, callbacks = [callback]) ``` 2022-01-07 11:07:23.270573: W tensorflow/core/framework/cpu_allocator_impl.cc:81] Allocation of 57594240 exceeds 10% of system memory. 2022-01-07 11:07:23.363853: W tensorflow/core/framework/cpu_allocator_impl.cc:81] Allocation of 57594240 exceeds 10% of system memory. Train on 79992 samples, validate on 19998 samples Epoch 1/10000 79992/79992 [==============================] - 30s 369us/sample - loss: 0.1469 - val_loss: 0.1676 Epoch 2/10000 79992/79992 [==============================] - 25s 314us/sample - loss: 0.1339 - val_loss: 0.1737 Epoch 3/10000 79992/79992 [==============================] - 25s 319us/sample - loss: 0.1291 - val_loss: 0.1768 Epoch 4/10000 79992/79992 [==============================] - 25s 318us/sample - loss: 0.1231 - val_loss: 0.1768 Epoch 5/10000 79992/79992 [==============================] - 25s 316us/sample - loss: 0.1190 - val_loss: 0.1748 ```python import gresearch_crypto env = gresearch_crypto.make_env() iter_test = env.iter_test() ``` ```python for (test_df, sample_prediction_df) in iter_test: print(test_df) sample_prediction_df['Target'] = 0 print(sample_prediction_df) print(sample_prediction_df['Target'].dtypes) env.predict(sample_prediction_df) ``` This version of the API is not optimized and should not be used to estimate the runtime of your code on the hidden test set. timestamp Asset_ID Count Open High Low \ 0 1623542400 3 1201 1.478556 1.486030 1.478000 1 1623542400 2 1020 580.306667 583.890000 579.910000 2 1623542400 0 626 343.789500 345.108000 343.640000 3 1623542400 1 2888 35554.289632 35652.464650 35502.670000 4 1623542400 4 433 0.312167 0.312600 0.311920 5 1623542400 5 359 4.832550 4.845900 4.822900 6 1623542400 7 541 55.223080 55.494000 55.182000 7 1623542400 6 2186 2371.194286 2379.200000 2369.670000 8 1623542400 8 35 1.003150 1.019800 0.987300 9 1623542400 9 560 161.933429 162.480000 161.730000 10 1623542400 10 61 2939.862750 2952.160000 2936.230000 11 1623542400 13 229 0.068132 0.068240 0.068038 12 1623542400 12 383 0.327973 0.329272 0.327650 13 1623542400 11 123 243.137500 243.810000 242.960000 Close Volume VWAP row_id 0 1.483681 6.547996e+05 1.481439 0 1 582.276667 1.227988e+03 581.697038 1 2 344.598000 1.718833e+03 344.441729 2 3 35602.004286 1.638115e+02 35583.469303 3 4 0.312208 5.855774e+05 0.312154 4 5 4.837583 4.714355e+04 4.836607 5 6 55.344680 6.625202e+03 55.298816 6 7 2374.380714 1.214129e+03 2374.335307 7 8 1.003300 7.061928e+03 1.002936 8 9 162.214714 1.485009e+03 162.231310 9 10 2947.078025 9.584785e+00 2945.110614 10 11 0.068158 3.046438e+06 0.068158 11 12 0.328829 5.364911e+05 0.328582 12 13 243.532500 3.079589e+02 243.452697 13 row_id Target 0 0 0 1 1 0 2 2 0 3 3 0 4 4 0 5 5 0 6 6 0 7 7 0 8 8 0 9 9 0 10 10 0 11 11 0 12 12 0 13 13 0 int64 timestamp Asset_ID Count Open High Low \ 0 1623542460 3 672 1.482410 1.483759 1.47920 1 1623542460 2 1251 581.800000 585.590000 580.38000 2 1623542460 0 458 344.353500 344.790000 343.62000 3 1623542460 1 2006 35596.771429 35621.000000 35533.38000 4 1623542460 4 573 0.312274 0.312400 0.31154 5 1623542460 5 644 4.834550 4.849500 4.81850 6 1623542460 7 530 55.320900 55.426600 55.14900 7 1623542460 6 1261 2373.970101 2375.350000 2369.37000 8 1623542460 8 98 1.003050 1.019600 0.98410 9 1623542460 9 381 162.141857 162.380000 161.80000 10 1623542460 10 35 2946.370567 2949.720000 2943.71000 11 1623542460 13 395 0.068144 0.068334 0.06804 12 1623542460 12 287 0.328656 0.329280 0.32778 13 1623542460 11 62 243.368000 243.500000 242.94000 Close Volume VWAP row_id 0 1.482043 2.858286e+05 1.481495 14 1 582.358333 1.405285e+03 583.451389 15 2 344.089500 1.217352e+03 344.188716 16 3 35555.397143 9.336366e+01 35584.861196 17 4 0.311847 9.396433e+05 0.311930 18 5 4.838300 8.678307e+04 4.837430 19 6 55.286400 6.929458e+03 55.265491 20 7 2371.790000 7.867385e+02 2372.809830 21 8 1.001550 4.771951e+04 1.001078 22 9 162.193000 1.344810e+03 162.121310 23 10 2945.711900 2.794134e+00 2946.633163 24 11 0.068217 4.981365e+06 0.068201 25 12 0.328563 4.496390e+05 0.328548 26 13 243.330000 1.435053e+02 243.337398 27 row_id Target 0 14 0 1 15 0 2 16 0 3 17 0 4 18 0 5 19 0 6 20 0 7 21 0 8 22 0 9 23 0 10 24 0 11 25 0 12 26 0 13 27 0 int64 timestamp Asset_ID Count Open High Low \ 0 1623542520 3 849 1.481493 1.482896 1.477801 1 1623542520 2 540 582.100000 582.670000 579.620000 2 1623542520 0 535 343.778000 344.060000 342.500000 3 1623542520 1 3531 35550.271250 35576.590000 35402.870000 4 1623542520 4 1667 0.311670 0.312000 0.310430 5 1623542520 5 380 4.833440 4.835700 4.814300 6 1623542520 7 825 55.225700 55.309600 54.910000 7 1623542520 6 1856 2370.880011 2371.950076 2363.000000 8 1623542520 8 53 1.000450 1.016400 0.981700 9 1623542520 9 439 161.876143 162.030000 161.460000 10 1623542520 10 56 2939.582925 2941.808300 2930.030000 11 1623542520 13 338 0.068159 0.068170 0.067950 12 1623542520 12 264 0.328061 0.328370 0.326973 13 1623542520 11 49 242.745000 243.000000 242.230000 Close Volume VWAP row_id 0 1.479259 4.868546e+05 1.480640 28 1 580.540000 4.723730e+02 580.850017 29 2 343.009500 9.757970e+02 343.139652 30 3 35488.287500 2.205352e+02 35480.068897 31 4 0.311006 3.416122e+06 0.311131 32 5 4.822080 4.128848e+04 4.824327 33 6 55.026050 1.231549e+04 55.095780 34 7 2365.590000 7.640805e+02 2367.128372 35 8 0.997500 1.216251e+04 0.998949 36 9 161.829143 1.265969e+03 161.737656 37 10 2933.232000 6.084640e+00 2937.153112 38 11 0.068025 2.790134e+06 0.068057 39 12 0.327497 7.424868e+05 0.327676 40 13 242.347500 5.420834e+01 242.451070 41 row_id Target 0 28 0 1 29 0 2 30 0 3 31 0 4 32 0 5 33 0 6 34 0 7 35 0 8 36 0 9 37 0 10 38 0 11 39 0 12 40 0 13 41 0 int64 timestamp Asset_ID Count Open High Low \ 0 1623542580 3 1023 1.479075 1.479399 1.472600 1 1623542580 2 409 580.490000 580.690000 578.310000 2 1623542580 0 614 343.238000 343.406000 341.940000 3 1623542580 1 2901 35478.867162 35503.460134 35381.010000 4 1623542580 4 1094 0.310923 0.311400 0.310400 5 1623542580 5 465 4.819883 4.822100 4.808300 6 1623542580 7 347 54.997567 55.044200 54.916800 7 1623542580 6 2624 2365.769427 2367.500000 2359.010000 8 1623542580 8 169 0.996600 1.012900 0.975100 9 1623542580 9 431 161.862333 161.930000 161.270000 10 1623542580 10 34 2930.150250 2931.520000 2921.660000 11 1623542580 13 240 0.068015 0.068055 0.067866 12 1623542580 12 276 0.327267 0.327410 0.326130 13 1623542580 11 107 242.360000 242.550000 241.700000 Close Volume VWAP row_id 0 1.473527 3.286849e+05 1.476372 42 1 578.912000 2.045210e+02 579.470144 43 2 342.325000 1.295199e+03 342.525876 44 3 35423.490000 1.188025e+02 35438.243466 45 4 0.310676 2.403980e+06 0.310894 46 5 4.813450 3.476754e+04 4.815294 47 6 54.940950 2.735096e+03 54.974253 48 7 2360.505714 2.253663e+03 2362.394059 49 8 0.991000 6.251820e+04 0.992974 50 9 161.485000 1.156636e+03 161.595349 51 10 2925.073700 1.187095e+00 2926.427322 52 11 0.067936 2.572088e+06 0.067958 53 12 0.326406 2.571778e+05 0.326834 54 13 242.074000 1.393091e+02 242.123168 55 row_id Target 0 42 0 1 43 0 2 44 0 3 45 0 4 46 0 5 47 0 6 48 0 7 49 0 8 50 0 9 51 0 10 52 0 11 53 0 12 54 0 13 55 0 int64 ```python ```