# 深度學習 03:自訂 keras 元件
###### tags: `Deep Learning`
## Loss function
自訂一個損失函數時,若只是單純建立一個 function 給模型 compile,模型不會儲存 function 內的任何參數。比較好的方法是建立一個類別,
[keras.losses.Loss](https://github.com/tensorflow/tensorflow/blob/v2.3.1/tensorflow/python/keras/losses.py#L47-L207)
```python=
class HuberLoss(keras.losses.Loss):
def __init__(self, threshold=1.0, **kwargs):
self.threshold = threshold
super().__init__(**kwargs)
# 接收 y_true, y_pred 回傳 loss
def call(self, y_true, y_pred):
error = y_true - y_pred
is_small_error = tf.abs(error) < self.threshold
squared_loss = tf.square(error) / 2
linear_loss = self.threshold * tf.abs(error) - self.threshold**2 / 2
return tf.where(is_small_error, squared_loss, linear_loss)
# 載入時會呼叫 from_config() 來吃 get_config 的參數
def get_config(self):
base_config = super().get_config()
return {**base_config, "threshold": self.threshold}
```
- call() 接收標籤和預測值,計算所有的實例 loss,並回傳它們。
- **不用 \_\_call__() 是因為 keras 內部需要執行其他操作,此外 \_\_call__() 中一樣調用了 call(),所以繼承 call() 即可。**

- get_config() 回傳一個字典,將各個超參數名稱對應它的值,載入時會呼叫 from_config() 來吃 get_config 的參數
```python=
model = keras.models.load_model("my_model_with_a_custom_loss_class.h5",
custom_objects={"HuberLoss": HuberLoss})
```
當儲存模型時,閥值會隨著它一起被儲存。實際上 Keras 會呼叫 loss 的 get_config() 方法,並將組態設定為 HDFS 檔案內的 JSON。載入模型時,Keras 會呼叫 HuberLoss 類別的 from_config(),將 \**config 傳給建構式。
---
## Activation function
```python=
def my_softplus(z): # return value is just tf.nn.softplus(z)
return tf.math.log(tf.exp(z) + 1.0)
```
---
## Initializer
[tf.keras.initializers.Initializer](https://github.com/tensorflow/tensorflow/blob/v2.3.1/tensorflow/python/keras/initializers/initializers_v2.py#L30-L107)
```python=
def my_glorot_initializer(shape, dtype=tf.float32):
stddev = tf.sqrt(2. / (shape[0] + shape[1]))
return tf.random.normal(shape, stddev=stddev, dtype=dtype)
#keras.layers.Dense(100, kernel_initializer=my_glorot_initializer)
class my_init(keras.initializers.Initializer):
def __init__(self, mean, stddev):
self.mean = mean
self.stddev = stddev
# 這裡是用__call__
def __call__(self, shape, dtype=tf.float32):
return tf.random.normal(shape, mean=self.mean,
stddev=self.stddev, dtype=dtype)
def get_config(self):
return {'mean': self.mean, 'stddev': self.stddev}
#keras.layers.Dense(100, kernel_initializer=my_init(0, 0.1))
```
keras.initializers.Initializer 未定義父類別建構式(\_\_init__)及 get_config() 內沒有 return 東西,故不需要寫 super()。

---
## Regularizer
[keras.regularizers.Regularizer](
https://github.com/tensorflow/tensorflow/blob/v2.3.1/tensorflow/python/keras/regularizers.py#L47-L208)
```python=
# function 寫法
def my_l1_regularizer(weights):
return tf.reduce_sum(tf.abs(0.01 * weights))
#keras.layers.Dense(30, activation='relu',
# kernel_regularizer=my_l1_regularizer(0.01))
# class 寫法
class my_l1(keras.regularizers.Regularizer):
def __init__(self, factor):
self.factor = factor
def call(self, weights):
return tf.reduce_sum(tf.abs(self.factor * weights))
def get_config(self):
return {'factor':self.factor}
#keras.layers.Dense(30, activation='relu',
# kernel_regularizer=my_l1(0.01))
```
keras.regularizers.Regularizer 未定義父類別建構式(\_\_init__)及 get_config() 內沒有 return 東西,故不需要寫 super()。

---
## 約束
[tf.keras.constraints.Constraint](https://github.com/tensorflow/tensorflow/blob/v2.3.1/tensorflow/python/keras/initializers/initializers_v2.py#L30-L107)
```python=
def my_positive_weights(weights): # return value is just tf.nn.relu(weights)
return tf.where(weights < 0., tf.zeros_like(weights), weights)
#keras.layers.Dense(100, kernel_constraint=my_positive_weights)
class my_weights(keras.constraints.Constraint):
def call(self, weights):
return tf.where(weights < 0., tf.zeros_like(weights), weights)
def get_config(self):
return {}
#keras.layers.Dense(100, kernel_constraint=my_weights())
```

---
## Metrics
[keras.metrics.Metric](https://www.tensorflow.org/api_docs/python/tf/keras/metrics/Metric)
Metrics 會根據每個批次計算一次,並回傳截至目前整體批次的指標,若不想如此也可以使用 reset_state() 來重設這些變數。
```python=
class HuberMetric(keras.metrics.Metric):
def __init__(self, threshold=1.0, **kwargs):
super().__init__(**kwargs) # handles base args (e.g., dtype)
self.threshold = threshold
self.total = self.add_weight("total", initializer="zeros")
self.count = self.add_weight("count", initializer="zeros")
def huber_fn(self, y_true, y_pred): # workaround
error = y_true - y_pred
is_small_error = tf.abs(error) < self.threshold
squared_loss = tf.square(error) / 2
linear_loss = self.threshold * tf.abs(error) - self.threshold**2 / 2
return tf.where(is_small_error, squared_loss, linear_loss)
def update_state(self, y_true, y_pred, sample_weight=None):
metric = self.huber_fn(y_true, y_pred)
self.total.assign_add(tf.reduce_sum(metric))
self.count.assign_add(tf.cast(tf.size(y_true), tf.float32))
def result(self):
return self.total / self.count
def get_config(self):
base_config = super().get_config()
return {**base_config, "threshold": self.threshold}
```
- **add_weight()** 用來建立多批次之間的評量標準狀態所需變數
- **update_state()** 接收個批次的標籤與預測值,並更新變數
- **result()** 回傳最終結果
- **get_config()** 保存參數

---
## Layer
[keras.layers.Layer](https://github.com/tensorflow/tensorflow/blob/v2.3.1/tensorflow/python/keras/engine/base_layer.py#L104-L3035)
```python=
class MyDense(keras.layers.Layer):
def __init__(self, units, activation=None, **kwargs):
super().__init__(**kwargs)
self.units = units
self.activation = keras.activations.get(activation)
def build(self, batch_input_shape):
self.kernel = self.add_weight(
name="kernel", shape=[batch_input_shape[-1], self.units],
initializer="glorot_normal")
self.bias = self.add_weight(
name="bias", shape=[self.units], initializer="zeros")
super().build(batch_input_shape) # must be at the end
def call(self, X):
return self.activation(X @ self.kernel + self.bias)
def compute_output_shape(self, batch_input_shape):
return tf.TensorShape(batch_input_shape.as_list()[:-1] + [self.units])
def get_config(self):
base_config = super().get_config()
return {**base_config, "units": self.units,
"activation": keras.activations.serialize(self.activation)}
#MyDense(300)
```
- \*\*kwargs 帶了父類別的 input_shape、trainable、name 等參數
- keras.activation.get() 將 activation 參數轉成適當的激活函數(可接收函數、字串如 'relu'、'selu' 或 **None**)
- build() 幫各權重呼叫 add_weight() 方法來建立 layer 的變數,並接收 input_shape 來建立權重維度(上一層的神經元數量,通常是最後一個維度),會在第一次被使用時執行。
- **build() 結束時必須呼叫父類別的 build() 方法,讓 keras 知道這個 layer 已經建立。**
- call() 套用計算方法
- compute_output_shape() 回傳 layer 輸出的外型,通常 tf.keras 會自動推斷,可忽略
- **get_config() 藉由呼叫 keras.activations.serialize() 來儲存激活函數的所有組態設置**
<br>
若有多輸入的 layer(如 concatenate),則 call() 方法的引述必須是包含所有輸入的 tuple,compute_output_shape() 內的 input_shape 變數也必須是 tuple,
此外若要在訓練和測試期間有不同的行為,則需要在 call() 加入 training 參數:
```python=
def call(self, x, training=None):
if training:
noise = tf.random.normal((x), stddev=self.stdder)
return x + noise
else:
return x
```
---
## Model
[keras.models.Model](https://github.com/tensorflow/tensorflow/blob/v2.3.1/tensorflow/python/keras/engine/training.py#L159-L2634)
除非必要,否則將模型 layer 和 Model 本身分開寫會較清楚易懂。
```python=
class ResidualBlock(keras.layers.Layer):
def __init__(self, n_layers, n_neurons, **kwargs):
super().__init__(**kwargs)
self.hidden = [keras.layers.Dense(n_neurons, activation="elu",
kernel_initializer="he_normal")
for _ in range(n_layers)]
def call(self, inputs):
Z = inputs
for layer in self.hidden:
Z = layer(Z)
return inputs + Z
class ResidualRegressor(keras.models.Model):
def __init__(self, output_dim, **kwargs):
super().__init__(**kwargs)
self.hidden1 = keras.layers.Dense(30, activation="elu",
kernel_initializer="he_normal")
self.block1 = ResidualBlock(2, 30)
self.block2 = ResidualBlock(2, 30)
self.out = keras.layers.Dense(output_dim)
def call(self, inputs):
Z = self.hidden1(inputs)
for _ in range(1 + 3):
Z = self.block1(Z)
Z = self.block2(Z)
return self.out(Z)
#model = ResidualRegressor(1)
```
---
## 重構損失(reconstruction loss)
```python=
class ReconstructingRegressor(keras.models.Model):
def __init__(self, output_dim, **kwargs):
super().__init__(**kwargs)
self.hidden = [keras.layers.Dense(30, activation="selu",
kernel_initializer="lecun_normal")
for _ in range(5)]
self.out = keras.layers.Dense(output_dim)
self.reconstruction_mean = keras.metrics.Mean(name="reconstruction_error")
def build(self, batch_input_shape):
n_inputs = batch_input_shape[-1]
self.reconstruct = keras.layers.Dense(n_inputs)
super().build(batch_input_shape)
def call(self, inputs, training=None):
Z = inputs
for layer in self.hidden:
Z = layer(Z)
reconstruction = self.reconstruct(Z)
recon_loss = tf.reduce_mean(tf.square(reconstruction - inputs))
self.add_loss(0.05 * recon_loss)
if training:
result = self.reconstruction_mean(recon_loss)
self.add_metric(result)
return self.out(Z)
```
- 建立一個包含五層 Dense 加一個輸出層的 DNN
- build() 建立一個額外的 Dense,用來重構模型的輸入,必須在此建立,因為它吃的是 input_shape,而 input_shape 在呼叫 build() 後才知道。
- call() 計算重構 loss,接著用 add_loss() 將它加入模型的 loss 串列,並將其乘上 0.05,以確保不會主導主 loss。
---
## references
1. 精通機器學習,使用 Scikit-Learn, Keras 與 Tensorflow-Aurelien Greon
2. [Tensorflow Github](https://github.com/tensorflow/tensorflow)
3. [Keras docs](https://keras.io/api/)
4. [Why keras use “call” instead of \_\_call__?](https://stackoverflow.com/questions/57103604/why-keras-use-call-instead-of-call)
---