# 雙能肺炎檢測
## 方法一、減少資料直到類別平衡
1. 修改成以下程式碼產生資料集的DataFrame:
```
data = []
k = 0
for i in range(label_df.shape[0]):
if(label_df.iloc[i]["Pneumonia"] == 1 or label_df.iloc[i]["Pneumonia"] == 0): #原本是 ["Pneumonia"] == 1 改成 ["Nodule"] == 0
target = os.path.join(image_folder, label_df.iloc[i]["foldername"])
target = os.path.join(target, image_type)
if(not os.path.exists(target)):
continue
filename = label_df.iloc[i]["foldername"] + ".png"
if(label_df.iloc[i]["Pneumonia"] == 1):
k += 1
if(k <= 203):
data.append((label_df.iloc[i]["foldername"] + ".png", label_df.iloc[i]["Nodule"], label_df.iloc[i]["Pneumonia"]))
else:
data.append((label_df.iloc[i]["foldername"] + ".png", label_df.iloc[i]["Nodule"], label_df.iloc[i]["Pneumonia"]))
#copyfile(target, os.path.join(export_folder, filename))
```
2. 修改拆分訓練及與驗證集的seed (42->40):
```
train_list, val_list = train_test_split(df, #測試的dataframe
test_size=0.2, # 這個測試集數量可以自己設定,不一定要8 : 2,這邊只是一個範例
random_state=40,
shuffle = True)
```

## 方法二、對少量類別的資料進行資料增強 (如水平翻轉)
1. 請先上傳libs.py至環境中,libs.py已上傳至Trello中
```
import libs as L
```
2. 修改成以下程式碼產生資料集的DataFrame:
```
data = []
k = 0
for i in range(label_df.shape[0]):
if(label_df.iloc[i]["Pneumonia"] == 1 or label_df.iloc[i]["Pneumonia"] == 0): #原本是 ["Pneumonia"] == 1 改成 ["Nodule"] == 0
target = os.path.join(image_folder, label_df.iloc[i]["foldername"])
target = os.path.join(target, image_type)
if(not os.path.exists(target)):
continue
filename = label_df.iloc[i]["foldername"] + ".png"
target = os.path.join(image_folder, label_df.iloc[i]["foldername"])
target = os.path.join(target, image_type)
jsonPath = os.path.join(mask_folder, label_df.iloc[i]["foldername"] + '.json')
image = L.mask_lung_generator(target, jsonPath)
cv.imwrite(os.path.join(export_folder, filename), image)
data.append((filename, label_df.iloc[i]["Nodule"], label_df.iloc[i]["Pneumonia"]))
if(label_df.iloc[i]["Pneumonia"] == 0):
if(k < 191):
image = np.flip(image,axis=1)
filename = label_df.iloc[i]["foldername"] + "_2.png"
cv.imwrite(os.path.join(export_folder, filename), image)
data.append((filename, label_df.iloc[i]["Nodule"], label_df.iloc[i]["Pneumonia"]))
k = k + 1
```
3. 修改拆分訓練及與驗證集的seed (43):
```
train_list, val_list = train_test_split(df, #測試的dataframe
test_size=0.2, # 這個測試集數量可以自己設定,不一定要8 : 2,這邊只是一個範例
random_state=43,
shuffle = True)
```
## 方法三、原圖+肺部區域圖+資料增強
1. 修改成以下程式碼產生資料集的DataFrame
```
data = []
k = 0
for i in range(label_df.shape[0]):
if(label_df.iloc[i]["Pneumonia"] == 1 or label_df.iloc[i]["Pneumonia"] == 0): #原本是 ["Pneumonia"] == 1 改成 ["Nodule"] == 0
target = os.path.join(image_folder, label_df.iloc[i]["foldername"])
target = os.path.join(target, image_type)
if(not os.path.exists(target)):
continue
filename = label_df.iloc[i]["foldername"] + ".png"
jsonPath = os.path.join(mask_folder, label_df.iloc[i]["foldername"] + '.json')
image = L.mask_lung_generator(target, jsonPath)
cv.imwrite(os.path.join(export_folder, filename), image)
data.append((filename, label_df.iloc[i]["Nodule"], label_df.iloc[i]["Pneumonia"]))
filename = label_df.iloc[i]["foldername"] + "_ori.png"
copyfile(target, os.path.join(export_folder, filename))
data.append((filename, label_df.iloc[i]["Nodule"], label_df.iloc[i]["Pneumonia"]))
if(label_df.iloc[i]["Pneumonia"] == 0):
if(k < 191):
image = np.flip(image,axis=1)
filename = label_df.iloc[i]["foldername"] + "_2.png"
cv.imwrite(os.path.join(export_folder, filename), image)
data.append((filename, label_df.iloc[i]["Nodule"], label_df.iloc[i]["Pneumonia"]))
filename = label_df.iloc[i]["foldername"] + "_ori2.png"
image = cv.imread(target)
image = image.astype(float)
image = -image + np.max(image)
image = image.astype('uint8')
cv.imwrite(os.path.join(export_folder, filename), image)
data.append((filename, label_df.iloc[i]["Nodule"], label_df.iloc[i]["Pneumonia"]))
k = k + 1
```
2. 修改拆分訓練及與驗證集的seed (60):
```
train_list, val_list = train_test_split(df, #測試的dataframe
test_size=0.1, # 這個測試集數量可以自己設定,不一定要8 : 2,這邊只是一個範例
random_state=60,
shuffle = True)
```