谱聚类：原理 + python 实现

2. 问题描述

$$w_{ij}=w_{ji}= \begin{cases} 0& {x_i \notin N_j \;and \;x_j \notin N_i}\\ exp(-\dfrac{\|x_i-x_j\|_2^2}{2\sigma^2})& {x_i \in N_j\; or \; x_j \in N_i} \end{cases}$$

$$w_{ij}=w_{ji}= \begin{cases} 0& {x_i \notin N_j \;or \;x_j \notin N_i}\\ exp(-\dfrac{\|x_i-x_j\|_2^2}{2\sigma^2})& {x_i \in N_j\; and\; x_j \in N_i} \end{cases}$$

$$W(A,B) = \sum_{i\in A,j \in B}w_{ij}$$

$$Ncut(V) = \sum_{i=1}^k\frac{w(A_k,\overline{A_k})}{D(A_k)}$$

$$D(A_k) = \sum_{i\in A_k}d_i =\sum_{i,j\in A_k} w_{ij}$$

$$D = \left[ \begin{array}{ccc} d_1 & & &\\ &d_2 & &\\ & &\ddots &\\ & & &d_k\\ \end{array} \right]$$

$$y_i\quad s.t.\quad y_i\in \{0,1\}^k, \sum_{j=1}^ky_{ij} = 1$$

$$\hat{Y} = \mathop{\arg\min}\limits_{Y} Ncut(V)$$

3. 优化问题求解

\begin{aligned} Ncut(V) &= \sum_{i=1}^k\frac{W(A_i,\overline{A_i})}{\sum_{i\in A_i} d_i} \\ &= tr \left[ \begin{smallmatrix} \dfrac{W(A_1,\overline{A_1})}{\sum_{i\in A_1} d_i} & & &\\ & \dfrac{W(A_2,\overline{A_2})}{\sum_{i\in A_2} d_i}& & \\ & & \ddots & \\ & & &\dfrac{W(A_k,\overline{A_k})}{\sum_{i\in A_k} d_i}\\ \end{smallmatrix} \right] \\ &= tr \left[ \begin{smallmatrix} W(A_1,\overline{A_1})& & & \\ & W(A_2,\overline{A_2})& & \\ & & \ddots & \\ & & & W(A_k,\overline{A_k})\\ \end{smallmatrix} \right] \cdot \left[ \begin{smallmatrix} \sum_{i\in A_1} d_i& & & \\ & \sum_{i\in A_2} d_i& & \\ & & \ddots & \\ & & & \sum_{i\in A_k} d_i\\ \end{smallmatrix} \right]^{-1} \\ &= tr(O_{k\times k}\cdot P_{k\times k}^{-1})\\\end{aligned}

$$\mathop{\arg\min}\limits_{Y} tr(O\cdot P^{-1})$$

3.1 矩阵 P 的表示

\begin{aligned} Y\cdot Y^T_{k \times k} = \left( \begin{array}{ccc} y_1& y_2 & \cdots & y_n\\ \end{array} \right) \cdot \left( \begin{array}{ccc} y_1^T\\ \vdots\\ y_n^T\\ \end{array} \right) = \sum_{i=1}^ny_i\cdot y_i^T \end{aligned}

$$P = Y^T D Y$$

3.2 矩阵 O 的表示

$$W(A_k,\overline{A_k}) = W(A_k,V) – W(A_k,A_k) = \sum_{i\in A_k}d_i – \sum_{i,j\in A_k}w_{ij}$$ \begin{aligned} O =& \left[ \begin{smallmatrix} W(A_1,\overline{A_1})& & & \\ & W(A_2,\overline{A_2})& & \\ & & \ddots & \\ & & & W(A_k,\overline{A_k})\\ \end{smallmatrix} \right]\\ =& \left[ \begin{smallmatrix} \sum_{i\in A_1} d_i& & & \\ & \sum_{i\in A_2} d_i& & \\ & & \ddots & \\ & & & \sum_{i\in A_k} d_i\\ \end{smallmatrix} \right] – \left[ \begin{smallmatrix} W(A_1,A_1)& & & \\ & W(A_2,A_2)& & \\ & & \ddots & \\ & & & W(A_k,A_k)\\ \end{smallmatrix} \right] \\ =& Y^T D Y – Q \end{aligned}

\begin{aligned} Y^T_{k\times n} W_{n \times n} Y_{n \times k} &= \left( \begin{array}{ccc} y_1& y_2 & \cdots & y_n\\ \end{array} \right) \cdot \left( \begin{array}{ccc} W_{11}&\cdots&W_{n1}\\ \vdots&\ddots&\vdots\\ W_{1n}&\cdots&W_{nn}\\ \end{array} \right) \cdot \left( \begin{array}{ccc} y_1^T\\ \vdots\\ y_n^T\\ \end{array} \right)\\ &= \left( \begin{array}{ccc} \sum_{i=1}^n y_iw_{i1}&\cdots & \sum_{i=1}^n y_iw_{in}\\ \end{array} \right) \cdot \left( \begin{array}{ccc} y_1^T\\ \vdots\\ y_n^T\\ \end{array} \right)\\ &= \sum_{i=1}^n \sum_{j=1}^n y_iy_j^Tw_{ij} \end{aligned}

\begin{aligned} Y^T_{k\times n} W_{n \times n} Y_{n \times k} &= \left( \begin{array}{ccc} \sum_{i\in A_1}\sum_{j\in A_1}w_{ij}&\cdots&\sum_{i\in A_1}\sum_{j\in A_k}w_{ij}\\ \vdots&\ddots&\vdots\\ \sum_{i\in A_k}\sum_{j\in A_1}w_{ij}&\cdots&\sum_{i\in A_k}\sum_{j\in A_k}w_{ij}\\ \end{array} \right) \end{aligned}

3.3 优化问题的解

$$\hat{Y} = \mathop{\arg\min}\limits_{Y} tr(O\cdot P^{-1}) = \mathop{\arg\min}\limits_{Y} tr(\frac{Y^T(D-W)Y}{Y^TDY})$$

1. 由于 $D,W$ 均为对称矩阵，因此 $L$ 也是对称矩阵，其特征值为实数
2. 对于任意的向量 $f$，有：\begin{aligned} f^TLf &= f^TDf – f^TWf = \sum_{i=1}^nd_if_i^2 – \sum_{i=1}^n\sum_{j=1}^nw_{ij}f_if_j\\ &= \frac{1}{2}(\sum d_if_i^2 – 2 \sum_{i=1}^n\sum_{j=1}^nw_{ij}f_if_j + \sum d_jf_j^2)\\ &= \frac{1}{2}\sum_{i=1}^n\sum_{j=1}^nw_{ij}(f_i-f_j)^2 \ge 0 \end{aligned}因此 $L$ 半正定，其特征值均 $\ge$ 0

$$\hat{Y} =\mathop{\arg\min}\limits_{Y} tr(\frac{Y^TLY}{Y^TDY})$$

$$\mathop{\arg\min}\limits_{Z} tr(\frac{Z^TD^{-\frac{1}{2}}LD^{-\frac{1}{2}}Z}{Z^TZ})$$

$$\mathop{\arg\min}\limits_{Z} \frac{z_i^TD^{-\frac{1}{2}}LD^{-\frac{1}{2}}z_i}{z_i^Tz_i}$$

$$\mathop{\arg\min}\limits_{Z} z_i^TD^{-\frac{1}{2}}LD^{-\frac{1}{2}}z_i,\qquad s.t.\quad z_i^Tz_i = \textbf{1}$$

\begin{aligned} L(z_i) &= z_i^TD^{-\frac{1}{2}}LD^{-\frac{1}{2}}z_i – \lambda(z_i^Tz_i – 1) \\ \frac{\partial L(z_i)}{\partial z_i} &= D^{-\frac{1}{2}}LD^{-\frac{1}{2}}z_i – \lambda z_i = 0 \\ \Rightarrow \lambda \cdot z_i &= D^{-\frac{1}{2}}LD^{-\frac{1}{2}}z_i \end{aligned}

[mathjax]