# 上傳封包數據集

# 共享數據集連結網址如下 Phishing_BestFirst
https://drive.google.com/file/d/1POrqZgo3bZTR6gBE7JBuXcD-4QXpZxOS/view?usp=drive_link
# +程式碼
```javascript=1
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from google.colab import drive
drive.mount('/content/gdrive')
# 讀取數據集
file = 'gdrive/My Drive/Phishing_BestFirst.csv'
df = pd.read_csv(file)
print(df.isnull().values.sum())
print(df.shape)
```
# 執行程式碼並登入雲端硬碟給予權限

顯示的結果

# 數據集的特徵與分類欄位

# 數據處理
```javascript=1
from sklearn.preprocessing import StandardScaler,LabelEncoder
from sklearn.model_selection import train_test_split
# 數據預處理
LE=LabelEncoder()
df['class']=LE.fit_transform(df['class'])
x=np.asarray(df.drop(['class'],axis=1))
scaler=StandardScaler()
x=scaler.fit_transform(x)
y=np.asarray(df[['class']])
# 劃分訓練集和測試集
train_x,test_x,train_y,test_y=train_test_split(x,y,test_size=0.5,random_state=42)
print(train_x.shape)
print(test_x.shape)
print(df)
```
劃分結果


# SVM 模型
```javascript=1
from sklearn import svm
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
model=svm.SVC(kernel='linear')
model.fit(train_x,train_y)
pred_y=model.predict(test_x)
print(model.score(test_x,test_y))
print(confusion_matrix(test_y,pred_y))
model=svm.SVC(kernel='poly')
model.fit(train_x,train_y)
pred_y = model.predict(test_x)
print(model.score(test_x, test_y))
print(confusion_matrix(test_y, pred_y))
```
結果

# Xgboost 模型
```javascript=1
import xgboost as xgb
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
# 構建 XGBoost 模型
xgb_model = xgb.XGBClassifier()
xgb_model.fit(train_x, train_y)
# XGBoost 模型評估
xgb_pred_y = xgb_model.predict(test_x)
xgb_accuracy = xgb_model.score(test_x, test_y)
xgb_conf_matrix = confusion_matrix(test_y, xgb_pred_y)
# 打印 XGBoost 模型評估结果
print("XGBoost 模型準確率:", xgb_accuracy)
print("XGBoost 模型混淆矩陣:\n", xgb_conf_matrix)
```
結果

# ANN模型
```javascript=1
from sklearn.metrics import accuracy_score, confusion_matrix
from keras.models import Sequential
from keras.layers import Dense
# 構建ANN模型
model = Sequential()
model.add(Dense(64, input_dim=train_x.shape[1], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# 編譯模型
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# 訓練模型
model.fit(train_x, train_y, epochs=10, batch_size=32, verbose=1)
# 在測試集上評估模型
predictions = model.predict(test_x)
predictions = [1 if p > 0.5 else 0 for p in predictions]
# 計算準確率和混淆矩陣
accuracy = accuracy_score(test_y, predictions)
conf_matrix = confusion_matrix(test_y, predictions)
print("Accuracy with ANN model:", accuracy)
print("Confusion matrix with ANN model:")
print(conf_matrix)
```
結果

# 權重
```javascript=1
# 獲取權重
weights = model.get_weights()
print("Weights:")
for i, w in enumerate(weights):
print("Layer", i+1, "weights shape:", w.shape)
# 設置權重(此處僅為示例,具体情况需要根據實際需求設置)
# model.set_weights(new_weights)
```
結果

# Bios
```javascript=1
# 提取偏置参数
biases = [w for w in weights if len(w.shape) == 1]
# 顯示偏置参数
for i, bias in enumerate(biases):
print(f"Bias for layer {i+1}: {bias}")
```
結果

# Recall
```javascript=1
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score
# 在測試集上評估模型
predictions = model.predict(test_x)
predictions = [1 if p > 0.5 else 0 for p in predictions]
# 計算混淆矩陣
conf_matrix = confusion_matrix(test_y, predictions)
# 計算準確率
accuracy = accuracy_score(test_y, predictions)
# 計算精確率
precision = precision_score(test_y, predictions)
# 計算召回率
recall = recall_score(test_y, predictions)
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
```
結果
