# 預測2330漲跌(lstm)並做出策略再進行回測
###### tags: `tensorflow.keras`
## 抓資料
```python=
from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
from bs4 import BeautifulSoup
import requests
import pandas
options = Options()
options.add_argument("--disable-notifications")
driver = webdriver.Chrome(ChromeDriverManager().install())#打開chrome瀏覽器
driver.get("https://hk.finance.yahoo.com/quote/2330.TW/history?period1=1332547200&period2=1648080000&interval=1d&filter=history&frequency=1d&includeAdjustedClose=true")
for x in range(1,50):#將網頁往下滑50次 載入更多數據
height = driver.execute_script("return document.documentElement.scrollHeight")
driver.execute_script("window.scrollTo(0, " + str(height) + ");")
time.sleep(0.1)
```
```python=
soup = BeautifulSoup(driver.page_source)
a = pandas.read_html(soup.prettify())
a[0]
```

```python=
opening = []
hight = []
low = []
close = []
volume = []
date = []
for i in range(len(a[0])):
if '股' not in a[0]['開市'][i] :
if '-' not in a[0]['成交量'][i] :
if '價' not in a[0]['開市'][i]:
opening.append(float(a[0]['開市'][i]))
hight.append(float(a[0]['最高'][i]))
low.append(float(a[0]['最低'][i]))
close.append(float(a[0]['收市*'][i]))
volume.append(float(a[0]['成交量'][i]))
date.append(a[0]['日期'][i])
print(i)
date = list(reversed(date))
opening = list(reversed(opening))
hight = list(reversed(hight))
low = list(reversed(low))
close = list(reversed(close))
volume = list(reversed(volume))
```
```python=
import re
dat = []
for i in range(len(date)):
a = re.sub('年','-',date[i])
b = re.sub('月','-',a)
c = re.sub('日','-',b)
dat.append(c)
```
```python=
full = pandas.DataFrame(( zip(opening,hight,low,close,volume)),columns = ['open','hight','low','close','volume'])
full['updown'] = 0
for i in range(len(full)-1):
if float(full['close'][i]) > float(full['open'][i+1]):
full['updown'][i] = 1#1為長0為跌
```

## 正規畫
```python=
train = full[1:]
test = full[1:]
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
train = pandas.DataFrame(scaler.fit_transform(train), columns=full.columns)
test = pandas.DataFrame(scaler.fit_transform(test), columns=full.columns)
```
## 抓取用來預測的資料 及 預測值
```python=
import tqdm
n = 30 #用過去30天 x 預測 y資料
feature_names = list(train.drop('updown', axis=1).columns)
X = []
y = []
indexes = []
norm_data_x = train[feature_names]
for i in tqdm.tqdm_notebook(range(0,len(train)-n)):
X.append(norm_data_x.iloc[i:i+n]. values) #iloc[n,m]是取n,m那格數字 iloc[n:m]是取n*m那些數字
y.append(train['updown'].iloc[i+n-1]) #現有資料+30天的Y
indexes.append(train.index[i+n-1]) #Y的日期
```
## 建模 lstm(多用於預測)
```python=
import keras
from keras.models import Sequential
from keras.layers import Dense,LSTM
import numpy
n_steps = 30 #前幾天的資料
n_features = 5#用幾個欄位
model = Sequential()
model.add(LSTM(50,activation='relu', return_sequences=False, input_shape = (n_steps, n_features)))
#model.add(神經元數量,激活函數,輸出是否為時間序列 如果輸出為兩個以上值 就是序列 要寫True,input_shape(幾步,幾個特徵))
model.add(Dense(1))
model.compile(optimizer = 'adam', loss = 'mse' , metrics=['mse','mape'])
```
## 訓練
```python=
history = model.fit(numpy.array(X),numpy.array(y),batch_size = 100,epochs = 30)
```
```
Epoch 1/30
24/24 [==============================] - 1s 12ms/step - loss: 0.3778 - mse: 0.3778 - mape: 93689392.0000
Epoch 2/30
24/24 [==============================] - 0s 12ms/step - loss: 0.2747 - mse: 0.2747 - mape: 197704432.0000
Epoch 3/30
24/24 [==============================] - 0s 12ms/step - loss: 0.2490 - mse: 0.2490 - mape: 248887376.0000
Epoch 4/30
24/24 [==============================] - 0s 12ms/step - loss: 0.2493 - mse: 0.2493 - mape: 240265088.0000
Epoch 5/30
24/24 [==============================] - 0s 13ms/step - loss: 0.2476 - mse: 0.2476 - mape: 236481664.0000
Epoch 6/30
24/24 [==============================] - 0s 12ms/step - loss: 0.2481 - mse: 0.2481 - mape: 244062752.0000
Epoch 7/30
24/24 [==============================] - 0s 13ms/step - loss: 0.2475 - mse: 0.2475 - mape: 244211888.0000
Epoch 8/30
24/24 [==============================] - 0s 12ms/step - loss: 0.2469 - mse: 0.2469 - mape: 244918192.0000
Epoch 9/30
24/24 [==============================] - 0s 12ms/step - loss: 0.2475 - mse: 0.2475 - mape: 243948992.0000
Epoch 10/30
24/24 [==============================] - 0s 13ms/step - loss: 0.2475 - mse: 0.2475 - mape: 249503232.0000
Epoch 11/30
24/24 [==============================] - 0s 12ms/step - loss: 0.2476 - mse: 0.2476 - mape: 237542128.0000
Epoch 12/30
24/24 [==============================] - 0s 12ms/step - loss: 0.2470 - mse: 0.2470 - mape: 245051488.0000
Epoch 13/30
24/24 [==============================] - 0s 13ms/step - loss: 0.2469 - mse: 0.2469 - mape: 244758416.0000
Epoch 14/30
24/24 [==============================] - 0s 13ms/step - loss: 0.2469 - mse: 0.2469 - mape: 247838048.0000
Epoch 15/30
24/24 [==============================] - 0s 12ms/step - loss: 0.2473 - mse: 0.2473 - mape: 243551968.0000
Epoch 16/30
24/24 [==============================] - 0s 12ms/step - loss: 0.2468 - mse: 0.2468 - mape: 242248672.0000
Epoch 17/30
24/24 [==============================] - 0s 13ms/step - loss: 0.2466 - mse: 0.2466 - mape: 241368496.0000
Epoch 18/30
24/24 [==============================] - 0s 13ms/step - loss: 0.2466 - mse: 0.2466 - mape: 243799472.0000
Epoch 19/30
24/24 [==============================] - 0s 12ms/step - loss: 0.2464 - mse: 0.2464 - mape: 248815344.0000
Epoch 20/30
24/24 [==============================] - 0s 13ms/step - loss: 0.2470 - mse: 0.2470 - mape: 248027072.0000
Epoch 21/30
24/24 [==============================] - 0s 12ms/step - loss: 0.2465 - mse: 0.2465 - mape: 243964112.0000
Epoch 22/30
24/24 [==============================] - 0s 12ms/step - loss: 0.2474 - mse: 0.2474 - mape: 245234512.0000
Epoch 23/30
24/24 [==============================] - 0s 13ms/step - loss: 0.2464 - mse: 0.2464 - mape: 245181232.0000
Epoch 24/30
24/24 [==============================] - 0s 12ms/step - loss: 0.2463 - mse: 0.2463 - mape: 249691536.0000
Epoch 25/30
24/24 [==============================] - 0s 13ms/step - loss: 0.2459 - mse: 0.2459 - mape: 248387216.0000
Epoch 26/30
24/24 [==============================] - 0s 12ms/step - loss: 0.2470 - mse: 0.2470 - mape: 242287008.0000
Epoch 27/30
24/24 [==============================] - 0s 13ms/step - loss: 0.2461 - mse: 0.2461 - mape: 242720112.0000
Epoch 28/30
24/24 [==============================] - 0s 12ms/step - loss: 0.2460 - mse: 0.2460 - mape: 247012384.0000
Epoch 29/30
24/24 [==============================] - 0s 12ms/step - loss: 0.2458 - mse: 0.2458 - mape: 247025296.0000
Epoch 30/30
24/24 [==============================] - 0s 12ms/step - loss: 0.2458 - mse: 0.2458 - mape: 239765280.0000
```
## 預測
```python=
predictions= model.predict(numpy.array(X))#丟到模型做預測
predictions = pandas.DataFrame(predictions).rename(columns={0:'預測值'})#轉成DataFrame格式
predictions#x的預測值
y_test = pandas.DataFrame(numpy.array(y)).rename(columns={0 : "實際值"})
final = pandas.concat([predictions,y_test],axis = 1)#合併
final["mae"] = abs(final["預測值"]-final["實際值"])
final
```

```python=
for i in range(len(final['預測值'])):
if float(final['預測值'][i]) < 0.5:
final['預測值'][i] = 0
if float(final['預測值'][i]) >= 0.5:
final['預測值'][i] = 1
a = 0
for i in range(len(final['預測值'])):
if final['預測值'][i] == final['實際值'][i] :
a+=1
print(a/len(final['預測值']))
```
輸出結果
0.6059050064184852
# 回測
當預測為長買進 預測為跌則賣出
##
```python=
df = {'Date':dat,
'Volume':volume,
'Open':opening,
'High':hight,
'Low': low,
'Close':close,
}
df = pandas.DataFrame(df)
df.set_index('Date', inplace = True)
df.drop(df.head(100).index,inplace=True)
df.drop(df.tail(1).index,inplace = True)
df.index = pandas.to_datetime(df.index)
```
## 交易策略
```python=
from backtesting import Backtest, Strategy
from backtesting.test import SMA #從test子模組引入繪製均線功能
class Strategy(Strategy): #交易策略命名為SmaClass,使用backtesting.py的Strategy功能
def init(self):
price = self.data.Close
self.dataclose = self.data.Close
self.dataopen = self.data.Open
self.datahigh = self.data.High
self.datalow = self.data.Low
self.datavolume = self.data.Volume
def next(self):
close = []
opening = []
high = []
low = []
volume = []
for i in range(0,-30,-1):
close.append(self.dataclose[i])
opening.append(self.dataopen[i])
high.append(self.datahigh[i])
low.append(self.datalow[i])
volume.append(self.datavolume[i])
o = pandas.DataFrame(( zip(opening,high,low,close,volume)),columns = ['open','hight','low','close','volume'])
train = pandas.DataFrame(scaler.fit_transform(o))
feature_names = list(train)
X = []
X.append(train[feature_names].values)
predictions = model.predict(numpy.array(X))
if predictions >0.5:
self.buy()
if predictions <=0.5:
self.sell()
```
```python=
test = Backtest(df, SmaCross, cash=600000, commission=.002)
test.run()
```
