Restricted Boltzmann Machines
Sigmoid / binary
P(h_j = 1 | v) = sigmoid(b^h_j + \sum_{i} v_i w_{i,j})
P(v_i = 1 | h) = sigmoid(b^v_i + \sum_{j} h_j w_{i,j})
Energy
E(v,~h) =
- \sum_{i} b^v_i v_i
- \sum_{j} b^h_j h_j
- \sum_{i} \sum_{j} v_i h_j w_{i,j}
Probability of data v
P(v,~h) = \frac{1}{Z} e^{-E(v,~h)}
Z = \sum_{v'} \sum_{h'} e^{-E(v',~h')}
P(v)
= \frac{1}{Z} \sum_{h'} e^{-E(v,~h')}
= \frac{1}{Z} e^{-F(v)}
Free enery given data v
F(v)
=
- ln( \sum_{h'} e^{-E(v,~h')})
=
- \sum_{i} b^v_i v_i
- \sum_{j} softplus(b^h_j + \sum_{i} v_i w_{i,j})
\frac
{\partial \log F(v)}
{\partial w_{i,j}}
= ~\langle v_i h_j \rangle_{data}
- \langle v_i h_j \rangle_{model}
Update weight, visible and hidden bias
\nabla w_{i,j}
= \epsilon (
~\langle v_i h_j \rangle_{data}
- \langle v_i h_j \rangle_{model}
)
\nabla b^v_{i}
= \epsilon (
~\langle v_i \rangle_{data}
- \langle v_i \rangle_{model}
)
\nabla b^h_{j}
= \epsilon (
~\langle h_j \rangle_{data}
- \langle h_j \rangle_{model}
)
Softplus / categorical
P(v_{i_0,i_1} = 1~|~h)
=
\frac
{e^{b^v_{i_0,i_1} + \sum_{j} h_j w_{i_0,i_1,j}}}
{\sum_{i_1'} e^{b^v_{i_0,i_1'} + \sum_{j} h_j w_{i_0,i_1',j}}}
Energy
E(v,~h) =
- \sum_{i_0,i_1} b^v_{i_0,i_1} v_{i_0,i_1}
- \sum_{j} b^h_j h_j
- \sum_{i_0,i_1} \sum_{j} v_{i_0,i_1} h_j w_{i_0,i_1,j}
E(v,~h) =
- \sum_{i_0,i_1} b^v_{i_0,i_1} v_{i_0,i_1}
- \sum_{j_0,j_1} b^h_{j_0,j_1} h_{j_0,j_1}
- \sum_{i_0,i_1} \sum_{j_0,j_1} v_{i_0,i_1} h_{j_0,j_1} w_{i_0,i_1,j_1,j_0}
E(v,~h) =
\sum_{i} \frac{(v_i - b^v_i)^2}{2 \sigma^2_i}
- \sum_{j} b^h_j h_j
- \sum_{i} \sum_{j} \frac{v_i}{\sigma^2_i} h_j w_{i,j}
Gaussian / real value
P(v_i | h) = \mathcal{N}(v_i | b^v_i + \sum_{j} h_j w_{i,j}, \sigma^2_i)
Hybrid
P(h_j = 1~|~x,~y)
= sigmoid(b^h_j
+ \sum_{i^x} x_{i^x} w^{x,h}_{i^x,j}
+ \sum_{i_0^y, i_1^y, j} y_{i_0^y, i_1^y} w^{y_0, y_1, h}_{i_0^y,i_1^y,j})
Energy
E(x,~y,~h) =
- \sum_{j} b^h_j h_j
- \sum_{i^x} b^x_{i^x} x_{i^x}
- \sum_{i_0^y, i_1^y} b^y_{i_0^y, i_1^y} y_{i_0^y, i_1^y}
- \sum_{i^x,j} x_{i^x} h_j w^{x,h}_{i^x,j}
- \sum_{i_0^y, i_1^y, j} y_{i_0^y, i_1^y} h_j w^{y_0, y_1, h}_{i_0^y,i_1^y,j}
Probability and free energy of generative part
P(x,~y,~h) = \frac{1}{Z} e^{-E(x,~y,~h)}
P(x,~y)
= \frac{1}{Z} \sum_{h'} e^{-E(x,~y,~h')}
= \frac{1}{Z} e^{-F(x,~y)}
F(x,~y) =
- \sum_{i^x} b^x_{i^x} x_{i^x}
- \sum_{i_0^y, i_1^y} b^y_{i_0^y, i_1^y} y_{i_0^y, i_1^y}\\
- \sum_{j} softplus(b^h_j
+ \sum_{i^x} x_{i^x} w^{x,h}_{i^x,j}
+ \sum_{i_0^y, i_1^y} y_{i_0^y, i_1^y} w^{y_0, y_1, h}_{i_0^y,i_1^y,j})
Probability and free energy of categorical target
P(y_{i_0,i_1} = 1~|~h)
=
\frac
{e^{b^y_{i_0,i_1} + \sum_{j} h_j w^{y_0, y_1, h}_{i_0,i_1,j}}}
{\sum_{i_1'} e^{b^y_{i_0,i_1'} + \sum_{j} h_j w^{y_0, y_1, h}_{i_0,i_1',j}}}
P(y_{i_0,i_1} = 1~|~x)
=
\frac
{\sum_{h'} e^{-E(x,~y,~h')}}
{\sum_{y'} \sum_{h'} e^{-E(x,~y',~h')}} \\
=
\frac
{
e^{
b^y_{i_0,i_1}
+ \sum_{j} softplus(
b^h_j
+ \sum_{i^x} x_{i^x} w^{x,h}_{i^x,j}
+ w^{y_0, y_1, h}_{i_0^y,i_1^y,j}
)
}
}
{
\sum_{y'} e^{
b^y_{i_0,i_1}
+ \sum_{j} softplus(
b^h_j
+ \sum_{i^x} x_{i^x} w^{x,h}_{i^x,j}
+ w^{y_0, y_1, h}_{i_0^y,i_1^y,j}
)
}
} \\
=
\frac
{e^{-F(y_{i_0,i_1}|x)}}
{\sum_{i_1^{'}} e^{-F(y_{i_0, i_1^{'}}|x)}}
F(y_{i_0, i_1}|x)
=
- b^y_{i_0,i_1}
- \sum_{j} softplus(
b^h_j
+ \sum_{i^x} x_{i^x} w^{x,h}_{i^x,j}
+ w^{y_0, y_1, h}_{i_0^y,i_1^y,j}
)