# ML - linear model
###### tags: `machine learning`
```
#!/usr/bin/env python
import mglearn
import numpy as np
import matplotlib.pyplot as plt
#===============================================================================
# Wave dataset
mglearn.plots.plot_linear_regression_wave()
plt.tight_layout()
plt.show()
plt.close()
```
#### ouptut
```
w[0]: 0.393906 b: -0.031804
```

### Linear regression (ordinary least squares)
```
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
X, y = mglearn.datasets.make_wave(n_samples=60)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
lr = LinearRegression().fit(X_train, y_train)
print("lr.coef_: {}".format(lr.coef_))
print("lr.intercept_: {}".format(lr.intercept_))
print("Training set score: {:.2f}".format(lr.score(X_train, y_train)))
print("Test set score: {:.2f}".format(lr.score(X_test, y_test)))
```
#### output
```
lr.coef_: [0.39390555]
lr.intercept_: -0.031804343026759746
Training set score: 0.67
Test set score: 0.66
```
### Linear regression on Boston Housing dataset
```
X, y = mglearn.datasets.load_extended_boston()
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
lr = LinearRegression().fit(X_train, y_train)
print("Training set score: {:.2f}".format(lr.score(X_train, y_train)))
print("Test set score: {:.2f}".format(lr.score(X_test, y_test)))
```
#### output
```
Training set score: 0.95
Test set score: 0.61
```
### Ridge regression (L2 regularization)
```
from sklearn.linear_model import Ridge
ridge = Ridge().fit(X_train, y_train)
print("Training set score: {:.2f}".format(ridge.score(X_train, y_train)))
print("Test set score: {:.2f}".format(ridge.score(X_test, y_test)))
print("ridge.coef_:\n{}".format(ridge.coef_))
print("ridge.intercept_: {}".format(ridge.intercept_))
ridge10 = Ridge(alpha=10).fit(X_train, y_train)
print("Training set score: {:.2f}".format(ridge10.score(X_train, y_train)))
print("Test set score: {:.2f}".format(ridge10.score(X_test, y_test)))
ridge01 = Ridge(alpha=0.1).fit(X_train, y_train)
print("Training set score: {:.2f}".format(ridge01.score(X_train, y_train)))
print("Test set score: {:.2f}".format(ridge01.score(X_test, y_test)))
plt.plot(ridge.coef_, 's', label="Ridge alpha=1")
plt.plot(ridge10.coef_, '^', label="Ridge alpha=10")
plt.plot(ridge01.coef_, 'v', label="Ridge alpha=0.1")
plt.plot(lr.coef_, 'o', label="LinearRegression")
plt.xlabel("Coefficient index")
plt.ylabel("Coefficient magnitude")
plt.hlines(0, 0, len(lr.coef_))
plt.ylim(-25, 25)
plt.legend()
plt.tight_layout()
plt.show()
plt.close()
mglearn.plots.plot_ridge_n_samples()
plt.tight_layout()
plt.show()
plt.close()
```
#### output
```
Training set score: 0.89
Test set score: 0.75
ridge.coef_:
[-1.41368408e+00 -1.55661895e+00 -1.46543409e+00 -1.26616071e-01
-7.91193605e-02 8.33161023e+00 2.54975060e-01 -4.94145701e+00
3.89862268e+00 -1.05866058e+00 -1.58433734e+00 1.05103856e+00
-4.01220799e+00 3.33720475e-01 3.64725471e-03 -8.49295793e-01
7.44989267e-01 -1.43106836e+00 -1.62981017e+00 -1.40486294e+00
-4.47314366e-02 -1.74619880e+00 -1.46715888e+00 -1.33237111e+00
-1.69154625e+00 -5.06179637e-01 2.62197591e+00 -2.09210002e+00
1.95074661e-01 -2.75469422e-01 5.11308202e+00 -1.67083739e+00
-9.81863179e-02 6.34477127e-01 -6.10008281e-01 4.01805897e-02
-1.27661999e+00 -2.91349679e+00 3.39544035e+00 7.91904036e-01
1.35260232e+00 -4.03661265e+00 2.32361734e+00 -3.36712926e+00
1.81279204e+00 3.01566897e+00 -1.89452070e+00 -2.50844073e-01
-2.89543735e+00 -1.26616071e-01 -5.00217192e+00 -2.43951806e+00
2.85071846e+00 -8.57081177e-01 2.99141960e+00 2.34589755e+00
1.31207081e+00 1.71845119e+00 -2.59766697e+00 -1.32370675e+00
-2.81242223e+00 -2.09117058e+00 -1.08428335e+00 -2.73843625e+00
-1.61989753e+00 -2.80493280e+00 9.44641482e-01 -1.65363374e+00
1.66553558e+01 -1.10980551e+00 2.14188605e+00 -8.03855387e+00
-8.59149928e+00 -7.54161099e+00 1.02924022e+01 -7.96425897e+00
7.68540742e-01 -1.85213002e+00 2.51497387e+00 -3.42074257e-01
-1.79604278e+00 -2.93048162e-01 -4.78242379e+00 8.63283317e-01
4.22361423e-01 -1.41656695e+00 -2.12023113e-01 -5.08121369e+00
-5.47247509e-01 1.53835390e+00 1.81348033e+00 1.97252021e+00
1.81849652e+00 -7.14338697e+00 1.10472533e+00 1.42242216e+00
-1.31494020e+00 -6.77170441e+00 1.82204476e+00 -2.36112444e+00
4.34670572e-02 1.20886000e+00 -6.32599163e+00 1.03600231e+01]
ridge.intercept_: 21.390525958609985
Training set score: 0.79
Test set score: 0.64
Training set score: 0.93
Test set score: 0.77
```


### Lasso (L1 regularization)
```
from sklearn.linear_model import Lasso
lasso = Lasso().fit(X_train, y_train)
print("Training set score: {:.2f}".format(lasso.score(X_train, y_train)))
print("Test set score: {:.2f}".format(lasso.score(X_test, y_test)))
print("Number of features used: {}".format(np.sum(lasso.coef_ != 0)))
lasso001 = Lasso(alpha=0.01, max_iter=100000).fit(X_train, y_train)
print("Training set score: {:.2f}".format(lasso001.score(X_train, y_train)))
print("Test set score: {:.2f}".format(lasso001.score(X_test, y_test)))
print("Number of features used: {}".format(np.sum(lasso001.coef_ != 0)))
lasso00001 = Lasso(alpha=0.0001, max_iter=100000).fit(X_train, y_train)
print("Training set score: {:.2f}".format(lasso00001.score(X_train, y_train)))
print("Test set score: {:.2f}".format(lasso00001.score(X_test, y_test)))
print("Number of features used: {}".format(np.sum(lasso00001.coef_ != 0)))
plt.plot(lasso.coef_, 's', label="Lasso alpha=1")
plt.plot(lasso001.coef_, '^', label="Lasso alpha=0.01")
plt.plot(lasso00001.coef_, 'v', label="Lasso alpha=0.0001")
plt.plot(ridge01.coef_, 'o', label="Ridge alpha=0.1")
plt.legend(ncol=2, loc=(0, 1.05))
plt.ylim(-25, 25)
plt.xlabel("Coefficient index")
plt.ylabel("Coefficient magnitude")
plt.tight_layout()
plt.show()
plt.close()
```
#### output
```
Training set score: 0.29
Test set score: 0.21
Number of features used: 4
Training set score: 0.90
Test set score: 0.77
Number of features used: 33
Training set score: 0.95
Test set score: 0.64
Number of features used: 96
```

### Linear models for classification
```
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
X, y = mglearn.datasets.make_forge()
fig, axes = plt.subplots(1, 2, figsize=(10, 3))
for model, ax in zip([LinearSVC(max_iter=5000), LogisticRegression()], axes):
clf = model.fit(X, y)
mglearn.plots.plot_2d_separator(clf, X, fill=False, eps=0.5,
ax=ax, alpha=.7)
mglearn.discrete_scatter(X[:, 0], X[:, 1], y, ax=ax)
ax.set_title("{}".format(clf.__class__.__name__))
ax.set_xlabel("Feature 0")
ax.set_ylabel("Feature 1")
axes[0].legend()
plt.tight_layout()
plt.show()
plt.close()
'''
set max_iter=5000 to let Liblinear converge
/home/ycheng/anaconda3/envs/machine-learning/lib/python3.8/site-packages/sklearn/svm/_base.py:976: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
warnings.warn("Liblinear failed to converge, increase
'''
mglearn.plots.plot_linear_svc_regularization()
plt.tight_layout()
plt.show()
plt.close()
```
#### output


### LogisticRegression on Breast cancer dataset
```
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(
cancer.data, cancer.target, stratify=cancer.target, random_state=42)
logreg = LogisticRegression(max_iter=5000).fit(X_train, y_train)
print("Training set score: {:.3f}".format(logreg.score(X_train, y_train)))
print("Test set score: {:.3f}".format(logreg.score(X_test, y_test)))
'''
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html
set max_iter=5000
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
'''
logreg100 = LogisticRegression(C=100, max_iter=5000).fit(X_train, y_train)
print("Training set score: {:.3f}".format(logreg100.score(X_train, y_train)))
print("Test set score: {:.3f}".format(logreg100.score(X_test, y_test)))
'''
set max_iter=5000, if message still appear, clear cache folder.
/home/ycheng/anaconda3/envs/machine-learning/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:762: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
'''
logreg001 = LogisticRegression(C=0.01, max_iter=5000).fit(X_train, y_train)
print("Training set score: {:.3f}".format(logreg001.score(X_train, y_train)))
print("Test set score: {:.3f}".format(logreg001.score(X_test, y_test)))
'''
set max_iter=5000
/home/ycheng/anaconda3/envs/machine-learning/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:762: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
'''
plt.plot(logreg.coef_.T, 'o', label="C=1")
plt.plot(logreg100.coef_.T, '^', label="C=100")
plt.plot(logreg001.coef_.T, 'v', label="C=0.001")
plt.xticks(range(cancer.data.shape[1]), cancer.feature_names, rotation=90)
plt.hlines(0, 0, cancer.data.shape[1])
plt.ylim(-5, 5)
plt.xlabel("Coefficient index")
plt.ylabel("Coefficient magnitude")
plt.legend()
plt.tight_layout()
plt.show()
for C, marker in zip([0.001, 1, 100], ['o', '^', 'v']):
lr_l1 = LogisticRegression(C=C, penalty="l1", solver='liblinear').fit(X_train, y_train)
print("Training accuracy of l1 logreg with C={:.3f}: {:.2f}".format(
C, lr_l1.score(X_train, y_train)))
print("Test accuracy of l1 logreg with C={:.3f}: {:.2f}".format(
C, lr_l1.score(X_test, y_test)))
plt.plot(lr_l1.coef_.T, marker, label="C={:.3f}".format(C))
plt.xticks(range(cancer.data.shape[1]), cancer.feature_names, rotation=90)
plt.hlines(0, 0, cancer.data.shape[1])
plt.xlabel("Coefficient index")
plt.ylabel("Coefficient magnitude")
plt.ylim(-5, 5)
plt.legend(loc=3)
plt.tight_layout()
plt.show()
plt.close()
'''
Traceback (most recent call last):
File "./linear_model.py", line 186, in
lr_l1 = LogisticRegression(C=C, penalty="l1").fit(X_train, y_train)
File "/home/ycheng/anaconda3/envs/machine-learning/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1304, in fit
solver = _check_solver(self.solver, self.penalty, self.dual)
File "/home/ycheng/anaconda3/envs/machine-learning/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 442, in _check_solver
raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html
solver{‘newton-cg’, ‘lbfgs’, ‘liblinear’, ‘sag’, ‘saga’}, default=’lbfgs’
Algorithm to use in the optimization problem.
For small datasets, ‘liblinear’ is a good choice, whereas ‘sag’ and ‘saga’ are faster for large ones.
For multiclass problems, only ‘newton-cg’, ‘sag’, ‘saga’ and ‘lbfgs’ handle multinomial loss; ‘liblinear’ is limited to one-versus-rest schemes.
‘newton-cg’, ‘lbfgs’, ‘sag’ and ‘saga’ handle L2 or no penalty
‘liblinear’ and ‘saga’ also handle L1 penalty
‘saga’ also supports ‘elasticnet’ penalty
‘liblinear’ does not support setting penalty='none'
'''
```
#### output
```
Training set score: 0.958
Test set score: 0.958
Training set score: 0.981
Test set score: 0.965
Training set score: 0.953
Test set score: 0.951
```

```
Training accuracy of l1 logreg with C=0.001: 0.91
Test accuracy of l1 logreg with C=0.001: 0.92
Training accuracy of l1 logreg with C=1.000: 0.96
Test accuracy of l1 logreg with C=1.000: 0.96
Training accuracy of l1 logreg with C=100.000: 0.99
Test accuracy of l1 logreg with C=100.000: 0.98
```

### Linear models for multiclass classification
```
from sklearn.datasets import make_blobs
X, y = make_blobs(random_state=42)
mglearn.discrete_scatter(X[:, 0], X[:, 1], y)
plt.xlabel("Feature 0")
plt.ylabel("Feature 1")
plt.legend(["Class 0", "Class 1", "Class 2"])
plt.tight_layout()
plt.show()
plt.close()
linear_svm = LinearSVC().fit(X, y)
print("Coefficient shape: ", linear_svm.coef_.shape)
print("Coefficient: \n", linear_svm.coef_)
print("Intercept shape: ", linear_svm.intercept_.shape)
mglearn.discrete_scatter(X[:, 0], X[:, 1], y)
line = np.linspace(-15, 15)
for coef, intercept, color in zip(linear_svm.coef_, linear_svm.intercept_, ['b', 'r', 'g']):
plt.plot(line, -(line * coef[0] + intercept) / coef[1], c=color)
plt.ylim(-10, 15)
plt.xlim(-10, 8)
plt.xlabel("Feature 0")
plt.ylabel("Feature 1")
plt.legend(['Class 0', 'Class 1', 'Class 2', 'Line class 0', 'Line class 1', 'Line class 2'], loc=(1.01, 0.3))
plt.tight_layout()
plt.show()
mglearn.plots.plot_2d_classification(linear_svm, X, fill=True, alpha=.7)
mglearn.discrete_scatter(X[:, 0], X[:, 1], y)
line = np.linspace(-15, 15)
for coef, intercept, color in zip(linear_svm.coef_, linear_svm.intercept_, ['b', 'r', 'g']):
plt.plot(line, -(line * coef[0] + intercept) / coef[1], c=color)
plt.legend(['Class 0', 'Class 1', 'Class 2', 'Line class 0', 'Line class 1', 'Line class 2'], loc=(1.01, 0.3))
plt.xlabel("Feature 0")
plt.ylabel("Feature 1")
plt.tight_layout()
plt.show()
```
#### output

```
Coefficient shape: (3, 2)
Coefficient:
[[-0.17492678 0.23140053]
[ 0.47621399 -0.06937619]
[-0.18914124 -0.20400615]]
Intercept shape: (3,)
```

