函數式自動微分
環境配置
# 實驗環境已經預裝了mindspore==2.2.14,如需更換mindspore版本,可更改下面mindspore的版本號
!pip uninstall mindspore -y
!pip install -i https://pypi.mirrors.ustc.edu.cn/simple mindspore==2.2.14
import numpy as np
import mindspore
from mindspore import nn
from mindspore import ops
from mindspore import Tensor, Parameter
函數與計算圖
-
w x + b = z wx + b = z wx+b=z
-> A c t i v a t i o n ? F u n c t i o n ( z ) Activation - Function(z) Activation?Function(z)
-> y p r e d y_{pred} ypred?
-> C r o s s ? E n t r o p y ( y , y p r e d ) Cross - Entropy(y , y_{pred}) Cross?Entropy(y,ypred?) -
w , b 為需要優化的參數 w,b為需要優化的參數 w,b為需要優化的參數
x = ops.ones(5, mindspore.float32) # input tensor y = ops.zones(3, mindspore.float32) # expected output w = Parameter(Tensor(np.random.randn(5, 3), mindspore.float32), name = 'w') b = Parameter(Tensor(np.random.randn(3,), mindspore.float32), name='b') # biasdef function(x, y, w, b):z = ops.matmul(x, w) + bloss = ops.binary_cross_entropy_with_logits(z, y, ops.ones_like(z), ops.ones_like(z))return lossloss = function(x, y, w, b) print(loss) #output Tensor(shape=[], dtype=Float32, value= 0.914285)
微分函數與梯度計算
- 為優化模型需要求參數對loss的導數 ? l o s s ? w \frac{\partial loss}{\partial w} ?w?loss?, ? l o s s ? b \frac{\partial loss}{\partial b} ?b?loss?
- 調用
mindspore.grad
函數獲取function
的微分函數 fn:
待求導函數grad_position:
指定求導輸入位置索引- 使用
grad
獲得微分函數是一種函數變換,即輸入為函數,輸出也為函數
grad_fn = mindspore.grad(function, (2, 3))
grads = grad_fn(x, y, w, b)
print(grads)
#Output (Tensor(shape=[5, 3], dtype=Float32, value= [[ 6.56869709e-02, 5.37334494e-02, 3.01467031e-01], [ 6.56869709e-02, 5.37334494e-02, 3.01467031e-01], [ 6.56869709e-02, 5.37334494e-02, 3.01467031e-01], [ 6.56869709e-02, 5.37334494e-02, 3.01467031e-01], [ 6.56869709e-02, 5.37334494e-02, 3.01467031e-01]]), Tensor(shape=[3], dtype=Float32, value= [ 6.56869709e-02, 5.37334494e-02, 3.01467031e-01]))
Stop Gradient
- 實現對某個輸出項的梯度截斷,或消除某個Tensor對梯度的影響
def function_with_logits(x, y, w, b):z = ops.matmul(x, w) + bloss = ops.binary_cross_entropy_with_logits(z, y, ops.ones_like(z), ops.ones_like(z))return loss, z
grad_fn = mindspore.grad(function_with_logits, (2, 3))
grads = grad_fn(x, y, w, b)
# 若想屏蔽掉z對梯度的影響,使用ops.stop_gradient接口, 將梯度在此截斷def function_stop_gradient(x, y, w, b):z = ops.matmul(x, w) + bloss = ops.binary_cross_entropy_with_logits(z, y, ops.ones_like(z), ops.ones_like(z))return loss, ops.stop_gradient(z)grad_fn = mindspore.grad(function_stop_gradient, (2, 3))
grads = grad_fn(x, y, w, b)
Auxiliary data
- Auxiliary data為輔助數據,是函數除第一個輸出項外的其他輸出。
grad
和value_and_grad
提供has_aux
參數,當其設置為True
時,可以自動實現前文手動添加stop_gradient
的功能。
grad_fn = mindspore.grad(function_with_logits, (2, 3), has_aux=True)
grads, (z,) = grad_fn(x, y, w, b)
神經網絡梯度計算
#定義模型
class Network(nn.Cell):def __init__(self):super().__init__()self.w = wself.b = bdef construct(self, x):z = ops.matmul(x, self.w) + self.breturn z
# 實例化模型
model = Network()
# 實例化損失函數
loss_fn = nn.BCEWithLogitsLoss()
# 定義正向傳播
def forward_fn(x, y):z = model(x)loss = loss_fn(z, y)return loss
grad_fn = mindspore.value_and_grad(forward_fn, None, weights=model.trainable_params())
loss, grads = grad_fn(x, y)