Bayes Theorem
\(\boxed{\color{darkorange}{P(H|E)} = \dfrac{\color{green}{P(E|H)}\color{red}{P(H)}}{\color{blue}{P(E)}}}\)
\(\theta_d \sim \text{Dirichlet}(\alpha)\)
\(z_{d,n} | \theta_d \sim \text{Categorical}(K, \theta_d)\)
\(\beta_{k} \sim \text{Dirichlet}(\eta)\)
\(w_{d,n} | \beta_{1:K}, z_{d,n} \sim \text{Categorical}(K, \beta_k)\)
\[ \tiny{
\color{brown}{p(\beta_{1:K}, \theta_{1:D}, z_{1:D}, w_{1:D})} =
\displaystyle \prod_{k=1}^K p(\beta_k)
\displaystyle \prod_{d=1}^D
\Bigg(
p(\theta_d)
\bigg(
\displaystyle \prod_{n=1}^N p(z_{d,n} | \theta_d) p(w_{d,n} | \beta_{1:K}, z_{d,n})
\bigg)
\Bigg)
}
\]
\[ \scriptsize{ \color{darkorange}{p(\beta_{1:K}, \theta_{1:D}, z_{1:D} | w_{1:D})} = \dfrac{\color{green}{p(w_{1:D} | \beta_{1:K}, \theta_{1:D}, z_{1:D})} \color{red}{p(\beta_{1:K}, \theta_{1:D}, z_{1:D})}}{\color{blue}{p(w_{1:D})}} } \]
Two categories of topic modeling algorithms:
\(\color{red}{z} \sim \text{Beta}(\alpha = 3, \beta = 3)\)
\(\color{green}{x_n|z} \sim \text{Bernouli}(p = z) \quad \forall n = 1, \dots, N\)
z = np.linspace(0, 1, 250) prior_a, prior_b = 3, 3 # prior distribution: Beta[3, 3] p_of_z = scs.beta(prior_a, prior_b).pdf(z) plt.xlabel('z') plt.ylabel('p(z)') plt.plot(z, p_z) plt.show()
N = 30 true_prob = scs.uniform.rvs(size = 1) x = scs.bernoulli.rvs(p = true_prob, size = N) print("x =", x) # output: # x = [0 0 0 1 0 0 0 1 0 0 # 0 0 0 0 0 0 0 0 1 0 # 1 1 0 0 0 1 0 0 1 0]
\(\scriptsize{ \color{darkorange}{p(z | \boldsymbol{x})} = \dfrac{ \color{green}{p(\boldsymbol{x} | z)} \color{red}{p(z)} }{ \color{blue}{p(\boldsymbol{x})} } }\)
\[ \scriptsize{ \begin{aligned} D_{KL}\Big( \color{yellow}{q(z)} \space || \space \color{darkorange}{p(z | \boldsymbol{x})} \Big) &= \mathbb{E}_{\color{yellow}{q}} \big[ \log \frac{\color{yellow}{q(z)}}{\color{darkorange}{p(z | \boldsymbol{x})}}\big] = \mathbb{E}_{\color{yellow}{q}}\Big[ \log \frac{\color{yellow}{q(z)} \color{blue}{p(\boldsymbol{x})}}{\color{green}{p(\boldsymbol{x}|z)} \color{red}{p(z)}} \Big] \\ &= \mathbb{E}_{\color{yellow}{q}}\Big[ \log \color{yellow}{q(z)} \Big] - \mathbb{E}_{\color{yellow}{q}} \Big[ \log \color{green}{p(\boldsymbol{x} | z)} \color{red}{p(z)} \Big] + \mathbb{E}_{\color{yellow}{q}} \Big[ \log \color{blue}{p(\boldsymbol{x})} \Big] \\ &= \mathbb{E}_{\color{yellow}{q}}\Big[ \log \color{yellow}{q(z)} \Big] - \mathbb{E}_{\color{yellow}{q}} \Big[ \log \color{brown}{p(\boldsymbol{x}, z)} \Big] + \log \color{blue}{p(\boldsymbol{x})} \end{aligned} } \]
\[ \scriptsize { \begin{aligned} \log \color{blue}{p(\boldsymbol{x})} &= D_{KL}\Big( \color{yellow}{q(z)} \space || \space \color{darkorange}{p(z | \boldsymbol{x})} \Big) - \mathbb{E}_{\color{yellow}{q}}\Big[ \log \color{yellow}{q(z)} \Big] + \mathbb{E}_{\color{yellow}{q}} \Big[ \log \color{brown}{p(\boldsymbol{x}, z)} \Big] \\ &\ge \mathbb{E}_{\color{yellow}{q}} \Big[ \log \color{brown}{p(\boldsymbol{x}, z)} \Big] - \mathbb{E}_{\color{yellow}{q}}\Big[ \log \color{yellow}{q(z)} \Big] = \mathcal{L}(\alpha_q, \beta_q) \end{aligned} } \]
\[ \boxed{ \mathcal{L}(\alpha_q, \beta_q) = \mathbb{E}_q \big[ \log p(\boldsymbol{x},z) \big] - \mathbb{E}_q \big[ \log q(z) \big] } \]