Restricted Boltzmann Machines

Sigmoid / binary

P(h_j = 1 | v) = sigmoid(b^h_j + \sum_{i} v_i w_{i,j})
P(v_i = 1 | h) = sigmoid(b^v_i + \sum_{j} h_j w_{i,j})

Energy

E(v,~h) = 
- \sum_{i} b^v_i v_i
- \sum_{j} b^h_j h_j
- \sum_{i} \sum_{j} v_i h_j w_{i,j}

Probability of data v

P(v,~h) = \frac{1}{Z} e^{-E(v,~h)}
Z = \sum_{v'} \sum_{h'} e^{-E(v',~h')}
P(v)
= \frac{1}{Z} \sum_{h'} e^{-E(v,~h')}
= \frac{1}{Z} e^{-F(v)}

Free enery given data v

F(v)
=
- ln( \sum_{h'} e^{-E(v,~h')})
=
- \sum_{i} b^v_i v_i
- \sum_{j} softplus(b^h_j + \sum_{i} v_i w_{i,j})
\frac
{\partial \log F(v)}
{\partial w_{i,j}}
= ~\langle v_i h_j \rangle_{data}
- \langle v_i h_j \rangle_{model}

Update weight, visible and hidden bias

\nabla w_{i,j}
= \epsilon (
~\langle v_i h_j \rangle_{data}
- \langle v_i h_j \rangle_{model}
)
\nabla b^v_{i}
= \epsilon (
~\langle v_i \rangle_{data}
- \langle v_i \rangle_{model}
)
\nabla b^h_{j}
= \epsilon (
~\langle h_j \rangle_{data}
- \langle h_j \rangle_{model}
)

Softplus / categorical

P(v_{i_0,i_1} = 1~|~h)
=
\frac
{e^{b^v_{i_0,i_1} + \sum_{j} h_j w_{i_0,i_1,j}}}
{\sum_{i_1'} e^{b^v_{i_0,i_1'} + \sum_{j} h_j w_{i_0,i_1',j}}}

Energy

E(v,~h) = 
- \sum_{i_0,i_1} b^v_{i_0,i_1} v_{i_0,i_1}
- \sum_{j} b^h_j h_j
- \sum_{i_0,i_1} \sum_{j} v_{i_0,i_1} h_j w_{i_0,i_1,j}
E(v,~h) = 
- \sum_{i_0,i_1} b^v_{i_0,i_1} v_{i_0,i_1}
- \sum_{j_0,j_1} b^h_{j_0,j_1} h_{j_0,j_1}
- \sum_{i_0,i_1} \sum_{j_0,j_1} v_{i_0,i_1} h_{j_0,j_1} w_{i_0,i_1,j_1,j_0}
E(v,~h) = 
\sum_{i} \frac{(v_i - b^v_i)^2}{2 \sigma^2_i}
- \sum_{j} b^h_j h_j
- \sum_{i} \sum_{j} \frac{v_i}{\sigma^2_i} h_j w_{i,j}

Gaussian / real value

P(v_i | h) = \mathcal{N}(v_i | b^v_i + \sum_{j} h_j w_{i,j}, \sigma^2_i)

Hybrid

P(h_j = 1~|~x,~y)
= sigmoid(b^h_j
+ \sum_{i^x} x_{i^x} w^{x,h}_{i^x,j}
+ \sum_{i_0^y, i_1^y, j} y_{i_0^y, i_1^y} w^{y_0, y_1, h}_{i_0^y,i_1^y,j})

Energy

E(x,~y,~h) =
- \sum_{j} b^h_j h_j
- \sum_{i^x} b^x_{i^x} x_{i^x}
- \sum_{i_0^y, i_1^y} b^y_{i_0^y, i_1^y} y_{i_0^y, i_1^y}
- \sum_{i^x,j} x_{i^x} h_j w^{x,h}_{i^x,j}
- \sum_{i_0^y, i_1^y, j} y_{i_0^y, i_1^y} h_j w^{y_0, y_1, h}_{i_0^y,i_1^y,j}

Probability and free energy of generative part

P(x,~y,~h) = \frac{1}{Z} e^{-E(x,~y,~h)}
P(x,~y)
= \frac{1}{Z} \sum_{h'} e^{-E(x,~y,~h')}
= \frac{1}{Z} e^{-F(x,~y)}
F(x,~y) =
- \sum_{i^x} b^x_{i^x} x_{i^x}
- \sum_{i_0^y, i_1^y} b^y_{i_0^y, i_1^y} y_{i_0^y, i_1^y}\\
- \sum_{j} softplus(b^h_j
+ \sum_{i^x} x_{i^x} w^{x,h}_{i^x,j}
+ \sum_{i_0^y, i_1^y} y_{i_0^y, i_1^y} w^{y_0, y_1, h}_{i_0^y,i_1^y,j})

Probability and free energy of categorical target

P(y_{i_0,i_1} = 1~|~h)
=
\frac
{e^{b^y_{i_0,i_1} + \sum_{j} h_j w^{y_0, y_1, h}_{i_0,i_1,j}}}
{\sum_{i_1'} e^{b^y_{i_0,i_1'} + \sum_{j} h_j w^{y_0, y_1, h}_{i_0,i_1',j}}}
	P(y_{i_0,i_1} = 1~|~x)
	=
	\frac
	{\sum_{h'} e^{-E(x,~y,~h')}}
	{\sum_{y'} \sum_{h'} e^{-E(x,~y',~h')}} \\
	=
	\frac
	{
            e^{
                b^y_{i_0,i_1}
        	+ \sum_{j} softplus(
        	    b^h_j
                    + \sum_{i^x} x_{i^x} w^{x,h}_{i^x,j}
                    + w^{y_0, y_1, h}_{i_0^y,i_1^y,j}
                )
            }
        }
	{
    	    \sum_{y'} e^{
                b^y_{i_0,i_1}
        	+ \sum_{j} softplus(
        	    b^h_j
                    + \sum_{i^x} x_{i^x} w^{x,h}_{i^x,j}
                    + w^{y_0, y_1, h}_{i_0^y,i_1^y,j}
                )
            }
        } \\ 
	=
	\frac
	{e^{-F(y_{i_0,i_1}|x)}}
	{\sum_{i_1^{'}} e^{-F(y_{i_0, i_1^{'}}|x)}}
F(y_{i_0, i_1}|x)
=
      - b^y_{i_0,i_1}
- \sum_{j} softplus(
    b^h_j
          + \sum_{i^x} x_{i^x} w^{x,h}_{i^x,j}
          + w^{y_0, y_1, h}_{i_0^y,i_1^y,j}
      )