109109A_{11} & \c dots & A_{1n} \\ \v dots & \d dots & \v dots \\ A_{m1} & \c dots & A_{mn} \
110110\e nd{bmatrix}$" } :
111111
112- <Collapse label = " $$
113- \nabla_x (A x) = A^T
114- $$" >
115-
116- $$
117- \begin{split}
118- &\ \nabla_x (A x) = \nabla_x (A_1 x_1 + A_2 x_2 + \cdots + A_n x_n) \\
119- =&\ \nabla_x ([A_{11} \cdots A_{1m}] x_1 + [A_{21} \cdots A_{2m}] x_2 + \cdots + [A_{n1} \cdots A_{nm}] x_n) \\
120- =&\ \nabla_x (A_1 x_1 + A_2 x_2 + \cdots + A_n x_n) \\
121- =&\ \nabla_x (A_1 x_1) + \nabla_x (A_2 x_2) + \cdots + \nabla_x (A_n x_n) \\
122- =&\ \begin{bmatrix}
123- \frac{\partial}{\partial x_1} (A_1 x_1) , \frac{\partial}{\partial x_2} (A_2 x_2) , \cdots , \frac{\partial}{\partial x_n} (A_n x_n)
124- \end{bmatrix} \\
125- =&\ \begin{bmatrix}
126- A_1 , A_2 , \cdots , A_n
127- \end{bmatrix} = A^T
128- \end{split}
129- $$
130-
131- </Collapse >
112+ <Collapse label = " $$
113+ \nabla_x (A x) = A^T
114+ $$" >
115+
116+ $$
117+ \begin{split}
118+ &\ \nabla_x (A x) = \nabla_x (A_1 x_1 + A_2 x_2 + \cdots + A_n x_n) \\
119+ =&\ \nabla_x ([A_{11} \cdots A_{1m}] x_1 + [A_{21} \cdots A_{2m}] x_2 + \cdots + [A_{n1} \cdots A_{nm}] x_n) \\
120+ =&\ \nabla_x (A_1 x_1 + A_2 x_2 + \cdots + A_n x_n) \\
121+ =&\ \nabla_x (A_1 x_1) + \nabla_x (A_2 x_2) + \cdots + \nabla_x (A_n x_n) \\
122+ =&\ \begin{bmatrix}
123+ \frac{\partial}{\partial x_1} (A_1 x_1) , \frac{\partial}{\partial x_2} (A_2 x_2) , \cdots , \frac{\partial}{\partial x_n} (A_n x_n)
124+ \end{bmatrix} \\
125+ =&\ \begin{bmatrix}
126+ A_1 , A_2 , \cdots , A_n
127+ \end{bmatrix} = A^T
128+ \end{split}
129+ $$
130+
131+ </Collapse >
132132
133133 - 对于所有的 :ctip[ $\mathbf{ x } \in R^n$] { id = " $\m athbf{x} = [x_1, x_2, \c dots, x_n]^T$" } \
134134 和 :ctip[ $A \in R^{ n \times m } $] { id = " $\m athbf{A} = \b egin{bmatrix} \
135135A_{11} & \c dots & A_{1m} \\ \v dots & \d dots & \v dots \\ A_{n1} & \c dots & A_{nm} \
136136\e nd{bmatrix}$" } :
137137
138- <Collapse label = " $$
139- \nabla_x (x^T A) = A
140- $$" >
141-
142- $$
143- \begin{split}
144- &\ \nabla_x (x^T A) = \nabla_x (x_1 A_1 + x_2 A_2 + \cdots + x_n A_n) \\
145- =&\ \nabla_x (x_1 [A_{11} \cdots A_{1m}] + x_2 [A_{21} \cdots A_{2m}] + \cdots + x_n [A_{n1} \cdots A_{nm}]) \\
146- =&\ \nabla_x (x_1 A_1 + x_2 A_2 + \cdots + x_n A_n) \\
147- =&\ \nabla_x (x_1 A_1) + \nabla_x (x_2 A_2) + \cdots + \nabla_x (x_n A_n) \\
148- =&\ \begin{bmatrix} \
149- \frac{\partial}{\partial x_1} (x_1 A_1) , \frac{\partial}{\partial x_2} (x_2 A_2) , \cdots , \frac{\partial}{\partial x_n} (x_n A_n)
150- \end{bmatrix} \\
151- =&\ \begin{bmatrix}
152- A_1 , A_2 , \cdots , A_n
153- \end{bmatrix} = A
154- \end{split}
155- $$
156-
157- </Collapse >
138+ <Collapse label = " $$
139+ \nabla_x (x^T A) = A
140+ $$" >
141+
142+ $$
143+ \begin{split}
144+ &\ \nabla_x (x^T A) = \nabla_x (x_1 A_1 + x_2 A_2 + \cdots + x_n A_n) \\
145+ =&\ \nabla_x (x_1 [A_{11} \cdots A_{1m}] + x_2 [A_{21} \cdots A_{2m}] + \cdots + x_n [A_{n1} \cdots A_{nm}]) \\
146+ =&\ \nabla_x (x_1 A_1 + x_2 A_2 + \cdots + x_n A_n) \\
147+ =&\ \nabla_x (x_1 A_1) + \nabla_x (x_2 A_2) + \cdots + \nabla_x (x_n A_n) \\
148+ =&\ \begin{bmatrix} \
149+ \frac{\partial}{\partial x_1} (x_1 A_1) , \frac{\partial}{\partial x_2} (x_2 A_2) , \cdots , \frac{\partial}{\partial x_n} (x_n A_n)
150+ \end{bmatrix} \\
151+ =&\ \begin{bmatrix}
152+ A_1 , A_2 , \cdots , A_n
153+ \end{bmatrix} = A
154+ \end{split}
155+ $$
156+
157+ </Collapse >
158158
159159- ** 二次型** (二次型是二次函数在向量空间中的推广):
160160
@@ -163,23 +163,23 @@ A_{11} & \cdots & A_{1m} \\ \vdots & \ddots & \vdots \\ A_{n1} & \cdots & A_{nm}
163163A_{11} & \c dots & A_{1n} \\ \v dots & \d dots & \v dots \\ A_{n1} & \c dots & A_{nn} \
164164\e nd{bmatrix}$" } :
165165
166- <Collapse label = " $$
167- \nabla_x x^T A x = (A + A^T) x
168- $$" >
166+ <Collapse label = " $$
167+ \nabla_x x^T A x = (A + A^T) x
168+ $$" >
169169
170- $$
171- \begin{split}
172- &\ \nabla_x x^T A x = \nabla_x \sum_{i=1}^n \sum_{j=1}^n x_i A_{ij} x_j \\
173- =&\ \frac{\partial}{\partial x_k} \sum_{i=1}^n \sum_{j=1}^n x_i A_{ij} x_j + \frac{\partial}{\partial x_k} \sum_{i=1}^n \sum_{j=1}^n x_j A_{ji} x_i \\
174- =&\ \sum_{i=1}^n \sum_{j=1}^n A_{ij} x_j + \sum_{i=1}^n \sum_{j=1}^n A_{ji} x_i \\
175- =&\ \sum_{j=1}^n A_{kj} x_j + \sum_{i=1}^n A_{ik} x_i \text{($i, j = k$ 时,$A_{kj} x_j, A_{ik} x_i$ 分别存在一项 $A_{kk} x_k$)} \\
176- =&\ \sum_{i=1}^n (\sum_{j=1}^n A_{ij} x_j) \cdot e_i + \sum_{j=1}^n (\sum_{i=1}^n A_{ji} x_i) \cdot e_j \\
177- =&\ \sum_{i=1}^n (\mathbf{A} \mathbf{x})_i \cdot e_i + \sum_{j=1}^n (\mathbf{A^T} \mathbf{x})_j \cdot e_j \\ \
178- =&\ (A + A^T) x
179- \end{split}
180- $$
170+ $$
171+ \begin{split}
172+ &\ \nabla_x x^T A x = \nabla_x \sum_{i=1}^n \sum_{j=1}^n x_i A_{ij} x_j \\
173+ =&\ \frac{\partial}{\partial x_k} \sum_{i=1}^n \sum_{j=1}^n x_i A_{ij} x_j + \frac{\partial}{\partial x_k} \sum_{i=1}^n \sum_{j=1}^n x_j A_{ji} x_i \\
174+ =&\ \sum_{i=1}^n \sum_{j=1}^n A_{ij} x_j + \sum_{i=1}^n \sum_{j=1}^n A_{ji} x_i \\
175+ =&\ \sum_{j=1}^n A_{kj} x_j + \sum_{i=1}^n A_{ik} x_i \text{($i, j = k$ 时,$A_{kj} x_j, A_{ik} x_i$ 分别存在一项 $A_{kk} x_k$)} \\
176+ =&\ \sum_{i=1}^n (\sum_{j=1}^n A_{ij} x_j) \cdot e_i + \sum_{j=1}^n (\sum_{i=1}^n A_{ji} x_i) \cdot e_j \\
177+ =&\ \sum_{i=1}^n (\mathbf{A} \mathbf{x})_i \cdot e_i + \sum_{j=1}^n (\mathbf{A^T} \mathbf{x})_j \cdot e_j \\ \
178+ =&\ (A + A^T) x
179+ \end{split}
180+ $$
181181
182- </Collapse >
182+ </Collapse >
183183
184184- ** 范数** :
185185
@@ -195,19 +195,19 @@ A_{11} & \cdots & A_{1n} \\ \vdots & \ddots & \vdots \\ A_{n1} & \cdots & A_{nn}
195195A_{11} & \c dots & A_{1n} \\ \v dots & \d dots & \v dots \\ A_{n1} & \c dots & A_{nn} \
196196\e nd{bmatrix}$" } :
197197
198- <Collapse label = " $$
199- \nabla_x \|x\|_2^2 = \nabla_x (x^T x) = 2x
200- $$" >
198+ <Collapse label = " $$
199+ \nabla_x \|x\|_2^2 = \nabla_x (x^T x) = 2x
200+ $$" >
201201
202- $$
203- \begin{split}
204- &\ \nabla_x \|x\|_2 = \nabla_x (\sqrt{x^T x}) ^ 2 \\
205- =&\ \nabla_x (x^T x) = \nabla_x (x_1^2 + x_2^2 + \cdots + x_n^2) \\
206- =&\ 2x
207- \end{split}
208- $$
202+ $$
203+ \begin{split}
204+ &\ \nabla_x \|x\|_2 = \nabla_x (\sqrt{x^T x}) ^ 2 \\
205+ =&\ \nabla_x (x^T x) = \nabla_x (x_1^2 + x_2^2 + \cdots + x_n^2) \\
206+ =&\ 2x
207+ \end{split}
208+ $$
209209
210- </Collapse >
210+ </Collapse >
211211
212212:::nerd
213213在深度学习中每层神经网络之间由 ** _ 权重矩阵_ (通常还会添加同维度的偏置向量)桥接不同纬度矩阵的计算** 。随后再通过:term[ 激活函数] { ./ terms / dl #activation - function }将计算结果映射到非线性空间,是:term [神经元]{./terms/dl#neuron }的计算核心。
0 commit comments