# 雙能肺炎檢測 ## 方法一、減少資料直到類別平衡 1. 修改成以下程式碼產生資料集的DataFrame: ``` data = [] k = 0 for i in range(label_df.shape[0]): if(label_df.iloc[i]["Pneumonia"] == 1 or label_df.iloc[i]["Pneumonia"] == 0): #原本是 ["Pneumonia"] == 1 改成 ["Nodule"] == 0 target = os.path.join(image_folder, label_df.iloc[i]["foldername"]) target = os.path.join(target, image_type) if(not os.path.exists(target)): continue filename = label_df.iloc[i]["foldername"] + ".png" if(label_df.iloc[i]["Pneumonia"] == 1): k += 1 if(k <= 203): data.append((label_df.iloc[i]["foldername"] + ".png", label_df.iloc[i]["Nodule"], label_df.iloc[i]["Pneumonia"])) else: data.append((label_df.iloc[i]["foldername"] + ".png", label_df.iloc[i]["Nodule"], label_df.iloc[i]["Pneumonia"])) #copyfile(target, os.path.join(export_folder, filename)) ``` 2. 修改拆分訓練及與驗證集的seed (42->40): ``` train_list, val_list = train_test_split(df, #測試的dataframe test_size=0.2, # 這個測試集數量可以自己設定,不一定要8 : 2,這邊只是一個範例 random_state=40, shuffle = True) ``` ![結果](https://i.imgur.com/sYhvb4K.png) ## 方法二、對少量類別的資料進行資料增強 (如水平翻轉) 1. 請先上傳libs.py至環境中,libs.py已上傳至Trello中 ``` import libs as L ``` 2. 修改成以下程式碼產生資料集的DataFrame: ``` data = [] k = 0 for i in range(label_df.shape[0]): if(label_df.iloc[i]["Pneumonia"] == 1 or label_df.iloc[i]["Pneumonia"] == 0): #原本是 ["Pneumonia"] == 1 改成 ["Nodule"] == 0 target = os.path.join(image_folder, label_df.iloc[i]["foldername"]) target = os.path.join(target, image_type) if(not os.path.exists(target)): continue filename = label_df.iloc[i]["foldername"] + ".png" target = os.path.join(image_folder, label_df.iloc[i]["foldername"]) target = os.path.join(target, image_type) jsonPath = os.path.join(mask_folder, label_df.iloc[i]["foldername"] + '.json') image = L.mask_lung_generator(target, jsonPath) cv.imwrite(os.path.join(export_folder, filename), image) data.append((filename, label_df.iloc[i]["Nodule"], label_df.iloc[i]["Pneumonia"])) if(label_df.iloc[i]["Pneumonia"] == 0): if(k < 191): image = np.flip(image,axis=1) filename = label_df.iloc[i]["foldername"] + "_2.png" cv.imwrite(os.path.join(export_folder, filename), image) data.append((filename, label_df.iloc[i]["Nodule"], label_df.iloc[i]["Pneumonia"])) k = k + 1 ``` 3. 修改拆分訓練及與驗證集的seed (43): ``` train_list, val_list = train_test_split(df, #測試的dataframe test_size=0.2, # 這個測試集數量可以自己設定,不一定要8 : 2,這邊只是一個範例 random_state=43, shuffle = True) ``` ## 方法三、原圖+肺部區域圖+資料增強 1. 修改成以下程式碼產生資料集的DataFrame ``` data = [] k = 0 for i in range(label_df.shape[0]): if(label_df.iloc[i]["Pneumonia"] == 1 or label_df.iloc[i]["Pneumonia"] == 0): #原本是 ["Pneumonia"] == 1 改成 ["Nodule"] == 0 target = os.path.join(image_folder, label_df.iloc[i]["foldername"]) target = os.path.join(target, image_type) if(not os.path.exists(target)): continue filename = label_df.iloc[i]["foldername"] + ".png" jsonPath = os.path.join(mask_folder, label_df.iloc[i]["foldername"] + '.json') image = L.mask_lung_generator(target, jsonPath) cv.imwrite(os.path.join(export_folder, filename), image) data.append((filename, label_df.iloc[i]["Nodule"], label_df.iloc[i]["Pneumonia"])) filename = label_df.iloc[i]["foldername"] + "_ori.png" copyfile(target, os.path.join(export_folder, filename)) data.append((filename, label_df.iloc[i]["Nodule"], label_df.iloc[i]["Pneumonia"])) if(label_df.iloc[i]["Pneumonia"] == 0): if(k < 191): image = np.flip(image,axis=1) filename = label_df.iloc[i]["foldername"] + "_2.png" cv.imwrite(os.path.join(export_folder, filename), image) data.append((filename, label_df.iloc[i]["Nodule"], label_df.iloc[i]["Pneumonia"])) filename = label_df.iloc[i]["foldername"] + "_ori2.png" image = cv.imread(target) image = image.astype(float) image = -image + np.max(image) image = image.astype('uint8') cv.imwrite(os.path.join(export_folder, filename), image) data.append((filename, label_df.iloc[i]["Nodule"], label_df.iloc[i]["Pneumonia"])) k = k + 1 ``` 2. 修改拆分訓練及與驗證集的seed (60): ``` train_list, val_list = train_test_split(df, #測試的dataframe test_size=0.1, # 這個測試集數量可以自己設定,不一定要8 : 2,這邊只是一個範例 random_state=60, shuffle = True) ```