###### tags: `clusting`
[toc]
# fuzzy c-means
- 原理和k-means相同,但是點有可能會在同一批次重複歸類到不同的群心 => 關係程度=[0,1]
> ex:
> 群心:A、B、C,原始數據點:a , 第一次分類a有可能會被歸類在A或是B,第二次分類有可能分類在A。
## 公式

## 程式碼
```python=
import copy
import math
import random
import matplotlib.pyplot as plt
global MAX
MAX= 10000.0
def draw(U,cluster_number,C):
fig = plt.figure(figsize=(15,10))
for i in range(len(U)):
fig.add_subplot(4,4,i+1)
plt.title("data = "+str(i))
plt.xlim([0,cluster_number-1]) # x軸邊界
plt.ylim([0,1]) # y軸邊界
plt.xticks(range(cluster_number)) # 設定x刻度
plt.xlabel("center", fontsize=14, labelpad = 5)
plt.ylabel("uij", fontsize=14, labelpad = 5)
plt.plot(U[i])
fig.tight_layout()
plt.show()
def print_matrix(list):
for i in range(len(list)):
print (list[i])
def initialize_U(data, cluster_number):
global MAX
U = []
for i in range(len(data)):
current = []
rand_sum = 0
for j in range(0, cluster_number):
dummy = random.randint(1,int(MAX))
current.append(dummy)
rand_sum += dummy
for j in range(0, cluster_number):
current[j] = current[j] / rand_sum
U.append(current)
return U
# 計算距離
def distance(point, center):
dummy = 0
for i in range(len(point)):
dummy += (point[i] - center[i]) ** 2
return math.sqrt(dummy)
# 終止條件
def end_conditon(U, U_old):
# 目標函數不再有太大的變化
for i in range(len(U)):
for j in range(len(U[0])):
if abs(U[i][j] - U_old[i][j]) > 0.00000001 :
return False
return True
def fuzzy(data, cluster_number, m):
U = initialize_U(data, cluster_number)
# print_matrix(U)
while (True):
# 複製,檢查用
U_old = copy.deepcopy(U)
# 計算群心
C = []
for j in range(cluster_number):
current_cluster_center = []
# 計算群心x,y
for i in range(len(data[0])):
sum_num = 0
sum_U = 0
# 該點到所有群新的距離程度
for k in range(len(data)):
# 分子
sum_num += (U[k][j] ** m) * data[k][i]
# 分母
sum_U += (U[k][j] ** m)
# 第i列的正規化聚类中心
current_cluster_center.append(sum_num/sum_U)
# 第j簇的所有聚類中心
C.append(current_cluster_center)
# 計算距離
dis =[]
for i in range(len(data)):
current = []
for j in range(cluster_number):
current.append(distance(data[i], C[j]))
dis.append(current)
# 更新U
for j in range(cluster_number):
for i in range(len(data)):
dummy = 0
for k in range(cluster_number):
# 分母
dummy += (dis[i][j] / dis[i][k]) ** (2/(m-1))
U[i][j] = 1 / dummy
if end_conditon(U, U_old):
print ("已完成聚類")
break
print("聚類結果")
print_matrix(U)
draw(U,cluster_number,C)
if __name__ == '__main__':
data = [[-39,32],[-22,38],[-40,23],[-33,37],[-28,37],[-5,-1],[12,1],[9,-11],[26,31],[28,25],[35,30],[37,23],[29,29],[0,0]]
print("請輸入模糊係數:",end = "")
m = int(input())
fuzzy(data , 3 , m)
```