```python=
'''step
1.產生100個亂數x
2.依區間劃分,每20個x產生對應的y
3.將x,y繪製成X: sample weight, Y: concentration的表格
4.將sample weight,concentration繪製成散佈圖
5.繪製回歸直線'''
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score
X = np.concatenate([
np.random.randint(0, 50, size=20),
np.random.randint(50, 100, size=20),
np.random.randint(25, 75, size=20),
np.random.randint(10, 90, size=20),
np.random.randint(0, 75, size=20)
])
#print(X)
Y = np.concatenate([
X[:20] * 2,
X[20:40] * 2 + 10,
X[40:60] * 3,
X[60:80] * 3 - 50,
X[80:100] * 5 - 100
])
#print(Y)
test = {
'sample weight': X,
'concentration': Y
}
product = pd.DataFrame(test)
#print(product)
plt.scatter(X, Y, color='blue', label='Data Points')
#plt.scatter(test['sample weight'], test['concentration']) #等價於 X, Y
plt.title('product test')
plt.xlabel('sample weight')
plt.ylabel('concentration')
#對 X, Y 進行線性組合
coefficients = np.polyfit(X, Y, 1)
m = coefficients[0] # 斜率
b = coefficients[1] # 截距
plt.plot(X, m * X + b, color='red', label=f'Regression Line: Y = {m:.3f}X + {b:.3f}') # .3f 保留小數後3位
# 計算 相關係數 r 值
r = np.corrcoef(X, Y)[0, 1]
# 直接平方得到 r^2 值
r_squared = r ** 2
# 計算 決定係數 r^2 值 可不使用此 module
Y_pred = m * X + b
r_squared2 = r2_score(Y, Y_pred)
# 決定圖示位置及標示
plt.text(80, 50, f'r = {r:.3f}', fontsize=11, color='green')
plt.text(80, 100, f'r^2 = {r_squared:.3f}', fontsize=11, color='orange')
plt.text(80, 125, f'r^2_2 = {r_squared2:.3f}', fontsize=11, color='orange')
plt.legend()
plt.grid(True)
plt.show()
```