# 9章発表メモ 齋藤
## 9.2.2
$\mathbf{\mu}_k$について(9.14)式を微分して0とおき、最尤解を求める。
\begin{align}
\frac{\partial}{\partial \mathbf{\mu}_k} \ln p(\mathbf{X}|\mathbf{\pi}, \mathbf{\mu}, \mathbf{\Sigma})
&= \frac{\partial}{\partial \mathbf{\mu}_k} \sum_n \ln \{\sum_k \pi_k N(\mathbf{x}_n|\mathbf{\mu}_k, \mathbf{\Sigma}_k ) \} \tag{9.14} \\
&= \sum_n \frac{\pi_k N(\mathbf{x}_n|\mathbf{\mu}_k, \mathbf{\Sigma}_k)}{\sum_j \pi_j N(\mathbf{x}_n|\mathbf{\mu}_j, \mathbf{\Sigma}_j )}\Sigma_k^{-1}(\mathbf{x}_n - \mathbf{\mu}_k) & \\
&= \sum_n\gamma(z_{nk})\Sigma_k^{-1}(\mathbf{x}_n - \mathbf{\mu}_k) \\
&= 0 &(\because (9.16))
\end{align}
よって、
\begin{align}
&\sum_n \gamma(z_{nk})\Sigma_k^{-1}\mathbf{x}_n = \sum_n \gamma(z_{nk})\Sigma_k^{-1}\mathbf{\mu}_k \\
&\Leftrightarrow \sum_n \gamma(z_{nk})\mathbf{x}_n = \sum_n \gamma(z_{nk})\mathbf{\mu}_k \\
&\Leftrightarrow \sum_n \gamma(z_{nk})\mathbf{x}_n = N_k \mathbf{\mu}_k &(\because (9.18))\\
&\Leftrightarrow \mathbf{\mu}_k = \frac{1}{N_k} \sum_n \gamma(z_{nk})\mathbf{x}_n \tag{9.17}
\end{align}
同様に、$\mathbf{\Sigma}_k$について微分し、最尤解を求める。
$$\begin{aligned} \frac{\partial}{\partial \mathbf{\Sigma}_{k}} \ln p(\mathbf{X} \mid \pi, \boldsymbol{\mu}, \mathbf{\Sigma}) &=\frac{\partial}{\partial \mathbf{\Sigma}_{k}} \sum_{n} \ln \left\{\sum_{k} \pi_{k} \mathcal{N}\left(\mathbf{x}_{n} \mid \boldsymbol{\mu}_{k}, \mathbf{\Sigma}_{k}\right)\right\} \\ &=\sum_{n} \frac{\partial}{\partial \mathbf{\Sigma}_{k}} \ln \left\{\sum_{k} \pi_{k} \mathcal{N}\left(\mathbf{x}_{n} \mid \boldsymbol{\mu}_{k}, \mathbf{\Sigma}_{k}\right)\right\} \\ &=\sum_{n} \frac{1}{\sum_{j} \pi_{j} \mathcal{N}\left(\mathbf{x}_{n} \mid \boldsymbol{\mu}_{j}, \mathbf{\Sigma}_{j}\right)} \frac{\partial}{\partial \mathbf{\Sigma}_{k}}\left\{\sum_{k} \pi_{k} \mathcal{N}\left(\mathbf{x}_{n} \mid \boldsymbol{\mu}_{k}, \mathbf{\Sigma}_{k}\right)\right\} \\ &=\sum_{n} \frac{1}{\sum_{j} \pi_{j} \mathcal{N}\left(\mathbf{x}_{n} \mid \boldsymbol{\mu}_{j}, \mathbf{\Sigma}_{j}\right)} \frac{\partial}{\partial \mathbf{\Sigma}_{k}} \pi_{k} \mathcal{N}\left(\mathbf{x}_{n} \mid \boldsymbol{\mu}_{k}, \mathbf{\Sigma}_{k}\right) \\ &=\sum_{n} \frac{\pi_{k}}{\sum_{j} \pi_{j} \mathcal{N}\left(\mathbf{x}_{n} \mid \boldsymbol{\mu}_{j}, \mathbf{\Sigma}_{j}\right)} \frac{1}{(2 \pi)^{D / 2}} \frac{\partial}{\partial \mathbf{\Sigma}_{k}}\left\{\frac{\exp \left(-\frac{1}{2}\left(\mathbf{x}_{n}-\boldsymbol{\mu}_{k}\right)^{T} \mathbf{\Sigma}_{k}^{-1}\left(\mathbf{x}_{n}-\boldsymbol{\mu}_{k}\right)\right)}{\Sigma_{k}^{1 / 2}}\right\} \\ &=\sum_{n} \frac{\pi_{k}}{\sum_{j} \pi_{j} \mathcal{N}\left(\mathbf{x}_{n} \mid \boldsymbol{\mu}_{j}, \mathbf{\Sigma}_{j}\right)} \frac{1}{(2 \pi)^{D / 2}}\left\{\frac{\exp \left(-\frac{1}{2}\left(\mathbf{x}_{n}-\boldsymbol{\mu}_{k}\right)^{T} \mathbf{\Sigma}_{k}^{-1}\left(\mathbf{x}_{n}-\boldsymbol{\mu}_{k}\right)\right) \frac{1}{2}\left(\mathbf{x}_{n}-\boldsymbol{\mu}_{k}\right)^{T} \mathbf{\Sigma}_{k}^{-2}\left(\mathbf{x}_{n}-\boldsymbol{\mu}_{k}\right) \mathbf{\Sigma}_{k}^{1 / 2}-\exp \left(-\frac{1}{2}\left(\mathbf{x}_{n}-\boldsymbol{\mu}_{k}\right)^{T} \mathbf{\Sigma}_{k}^{-1}\left(\mathbf{x}_{n}-\boldsymbol{\mu}_{k}\right)\right) \frac{1}{2} \mathbf{\Sigma}_{k}^{-1 / 2}}{\left(\Sigma_{k}^{1 / 2}\right)^{2}}\right\} \\ &=\sum_{n} \frac{\left|\mathbf{\Sigma}_{k}\right|^{1 / 2} \mathcal{N}\left(\mathbf{x}_{n} \mid \boldsymbol{\mu}_{k}, \mathbf{\Sigma}_{k}\right)}{\sum_{j} \pi_{j} \mathcal{N}\left(\mathbf{x}_{n} \mid \boldsymbol{\mu}_{j}, \mathbf{\Sigma}_{j}\right)} \frac{\left(\mathbf{x}_{n}-\boldsymbol{\mu}_{k}\right)^{T} \mathbf{\Sigma}_{k}^{-2}\left(\mathbf{x}_{n}-\boldsymbol{\mu}_{k}\right) \mathbf{\Sigma}_{k}^{1 / 2}}{2 \mathbf{\Sigma}_{k}}-\sum_{n} \frac{\left|\mathbf{\Sigma}_{k}\right|^{1 / 2} \mathcal{N}\left(\mathbf{x}_{n} \mid \boldsymbol{\mu}_{k}, \mathbf{\Sigma}_{k}\right)}{\sum_{j} \pi_{j} \mathcal{N}\left(\mathbf{x}_{n} \mid \boldsymbol{\mu}_{j}, \mathbf{\Sigma}_{j}\right)} \frac{\mathbf{\Sigma}_{k}^{-1 / 2}}{2 \mathbf{\Sigma}_{k}} \\ &=\left|\mathbf{\Sigma}_{k}\right|^{1 / 2}\left\{\sum_{n} \gamma_{z_{n} k} \frac{\left(\mathbf{x}_{n}-\boldsymbol{\mu}_{k}\right)^{T} \mathbf{\Sigma}_{k}^{-2}\left(\mathbf{x}_{n}-\boldsymbol{\mu}_{k}\right) \mathbf{\Sigma}_{k}^{1 / 2}}{2 \mathbf{\Sigma}_{k}}-\sum_{n} \gamma_{n k} \frac{\mathbf{\Sigma}_{k}^{-1 / 2}}{2 \mathbf{\Sigma}_{k}}\right\} \\ &=0 \end{aligned}$$
よって、
\begin{align}
&\sum_n \gamma_{z_nk} (\mathbf{x}_n - \mathbf{\mu}_k)^T\Sigma_k^{-2}(\mathbf{x}_n - \mathbf{\mu}_k)\Sigma_k^{1/2}= \sum_n \gamma_{nk} \Sigma_k^{-1/2} \\
&\Leftrightarrow \mathbf{\Sigma}_k = \frac{1}{N_k}\sum_n \gamma_{z_nk} (\mathbf{x}_n - \mathbf{\mu}_k)^T(\mathbf{x}_n - \mathbf{\mu}_k) \tag{9.19}
\end{align}
## 9.3.1
\begin{align}
p(\mathbf{Z}|\mathbf{X}, \mathbf{\mu}, \mathbf{\Sigma}, \mathbf{\pi}) &=
p(\mathbf{Z}|\mathbf{X})&(\because \text{D-separation}) \\
&\propto p(\mathbf{X}|\mathbf{Z})p(\mathbf{Z}) &(\because\text{Bayes' rule})\\
&= \prod_n\prod_k \pi_k^{z_{nk}} \prod_k N(\mathbf{x}|\mathbf{\mu}_k, \mathbf{\Sigma}_k)^{z_{nk}} &(\because (9.10), (9.11)) \\
&= \prod_n\prod_k [\pi_k N(\mathbf{x}|\mathbf{\mu}_k, \mathbf{\Sigma}_k)]^{z_{nk}}
\end{align}
以上から、
\begin{align}
&p(z_{nk}|\mathbf{x}_n, \mathbf{\mu}_k, \mathbf{\Sigma}_k, \mathbf{\pi}_k) \propto \prod_k [\pi_k N(\mathbf{x}|\mathbf{\mu}_k, \mathbf{\Sigma}_k)]^{z_{nk}} \\
&\Rightarrow p(z_{nk}|\mathbf{x}_n, \mathbf{\mu}_k, \mathbf{\Sigma}_k, \mathbf{\pi}_k) = \frac{\prod_k[\pi_k N(\mathbf{x}|\mathbf{\mu}_k, \mathbf{\Sigma}_k)]^{z_{nk}}}{\sum_{\mathbf{z}_n} \prod_j [\pi_j N(\mathbf{x}|\mathbf{\mu}_j, \mathbf{\Sigma}_j)]^{z_{nj}}}
\end{align}
よって、$z_{nk}$の事後分布の期待値は、
\begin{align}
E[z_{nk}]&= \sum_{\mathbf{z}_{n}}z_{nk} p(z_{nk}|\mathbf{x}_n, \mathbf{\mu}_k, \mathbf{\Sigma}_k, \mathbf{\pi}_k) \\
&= \frac{ \sum_{\mathbf{z}_{n}}z_{nk} \prod_k [\pi_k N(\mathbf{x}|\mathbf{\mu}_k, \mathbf{\Sigma}_k)]^{z_{nk}}}{\sum_{\mathbf{z}_n} \prod_j [\pi_j N(\mathbf{x}|\mathbf{\mu}_j, \mathbf{\Sigma}_j)]^{z_{nj}}} \\
&=\frac{\pi_k N(\mathbf{x}_n|\mathbf{\mu}_k, \mathbf{\Sigma}k )}{\sum_j \pi_j N(\mathbf{x}_n|\mathbf{\mu}_k, \mathbf{\Sigma}k )} &(\because \text{1-of-K})\\
&= \gamma (z_{nk})\tag{9.39}
\end{align}
よって、(9.36)式の$z_{nk}$を$E[z_{nk}]$で置き換えると、
\begin{align}
E_{\mathbf{Z}}[\ln p(\mathbf{Z}, \mathbf{X}| \mathbf{\mu}, \mathbf{\Sigma}, \mathbf{\pi})] =
\sum_n \sum_k \gamma_{z_{nk}}\{\ln \pi_k + \ln N(\mathbf{x}_n|\mathbf{\mu}_k, \mathbf{\Sigma}k ) \}
\end{align}