# 函数式自动微分

MindSpore使用函数式自动微分的设计理念，提供更接近于数学语义的自动微分接口gradvalue_and_grad。下面我们使用一个简单的单层线性变换模型进行介绍。

[1]:

import numpy as np
import mindspore
from mindspore import nn
from mindspore import ops
from mindspore import Tensor, Parameter


## 函数与计算图

[2]:

x = ops.ones(5, mindspore.float32)  # input tensor
y = ops.zeros(3, mindspore.float32)  # expected output
w = Parameter(Tensor(np.random.randn(5, 3), mindspore.float32), name='w') # weight
b = Parameter(Tensor(np.random.randn(3,), mindspore.float32), name='b') # bias


[3]:

def function(x, y, w, b):
z = ops.matmul(x, w) + b
loss = ops.binary_cross_entropy_with_logits(z, y, ops.ones_like(z), ops.ones_like(z))
return loss


[4]:

z = function(x, y, w, b)
print(z)

[4]:

Tensor(shape=[], dtype=Float32, value= 0.914285)


## 微分函数与梯度计算

• fn：待求导的函数。

• grad_position：指定求导输入位置的索引。

[5]:

grad_fn = mindspore.grad(function, (2, 3))


[6]:

grads = grad_fn(x, y, w, b)
print(grads)

[6]:

(Tensor(shape=[5, 3], dtype=Float32, value=
[[ 6.56869709e-02,  5.37334494e-02,  3.01467031e-01],
[ 6.56869709e-02,  5.37334494e-02,  3.01467031e-01],
[ 6.56869709e-02,  5.37334494e-02,  3.01467031e-01],
[ 6.56869709e-02,  5.37334494e-02,  3.01467031e-01],
[ 6.56869709e-02,  5.37334494e-02,  3.01467031e-01]]),
Tensor(shape=[3], dtype=Float32, value= [ 6.56869709e-02,  5.37334494e-02,  3.01467031e-01]))


## Stop Gradient

[7]:

def function_with_logits(x, y, w, b):
z = ops.matmul(x, w) + b
loss = ops.binary_cross_entropy_with_logits(z, y, ops.ones_like(z), ops.ones_like(z))
return loss, z

[8]:

grad_fn = mindspore.grad(function_with_logits, (2, 3))
grads = grad_fn(x, y, w, b)
print(grads)

[8]:

(Tensor(shape=[5, 3], dtype=Float32, value=
[[ 1.06568694e+00,  1.05373347e+00,  1.30146706e+00],
[ 1.06568694e+00,  1.05373347e+00,  1.30146706e+00],
[ 1.06568694e+00,  1.05373347e+00,  1.30146706e+00],
[ 1.06568694e+00,  1.05373347e+00,  1.30146706e+00],
[ 1.06568694e+00,  1.05373347e+00,  1.30146706e+00]]),
Tensor(shape=[3], dtype=Float32, value= [ 1.06568694e+00,  1.05373347e+00,  1.30146706e+00]))


[9]:

def function_stop_gradient(x, y, w, b):
z = ops.matmul(x, w) + b
loss = ops.binary_cross_entropy_with_logits(z, y, ops.ones_like(z), ops.ones_like(z))
return loss, ops.stop_gradient(z)

[10]:

grad_fn = mindspore.grad(function_stop_gradient, (2, 3))
grads = grad_fn(x, y, w, b)
print(grads)

[10]:

(Tensor(shape=[5, 3], dtype=Float32, value=
[[ 6.56869709e-02,  5.37334494e-02,  3.01467031e-01],
[ 6.56869709e-02,  5.37334494e-02,  3.01467031e-01],
[ 6.56869709e-02,  5.37334494e-02,  3.01467031e-01],
[ 6.56869709e-02,  5.37334494e-02,  3.01467031e-01],
[ 6.56869709e-02,  5.37334494e-02,  3.01467031e-01]]),
Tensor(shape=[3], dtype=Float32, value= [ 6.56869709e-02,  5.37334494e-02,  3.01467031e-01]))


## Auxiliary data

Auxiliary data意为辅助数据，是函数除第一个输出项外的其他输出。通常我们会将函数的loss设置为函数的第一个输出，其他的输出即为辅助数据。

gradvalue_and_grad提供has_aux参数，当其设置为True时，可以自动实现前文手动添加stop_gradient的功能，满足返回辅助数据的同时不影响梯度计算的效果。

[11]:

grad_fn = mindspore.grad(function_with_logits, (2, 3), has_aux=True)

[12]:

grads, (z,) = grad_fn(x, y, w, b)
print(grads, z)

[12]:

((Tensor(shape=[5, 3], dtype=Float32, value=
[[ 6.56869709e-02,  5.37334494e-02,  3.01467031e-01],
[ 6.56869709e-02,  5.37334494e-02,  3.01467031e-01],
[ 6.56869709e-02,  5.37334494e-02,  3.01467031e-01],
[ 6.56869709e-02,  5.37334494e-02,  3.01467031e-01],
[ 6.56869709e-02,  5.37334494e-02,  3.01467031e-01]]),
Tensor(shape=[3], dtype=Float32, value= [ 6.56869709e-02,  5.37334494e-02,  3.01467031e-01])),
Tensor(shape=[3], dtype=Float32, value= [-1.40476596e+00, -1.64932394e+00,  2.24711204e+00]))


## 神经网络梯度计算

[13]:

# Define model
class Network(nn.Cell):
def __init__(self):
super().__init__()
self.w = w
self.b = b

def construct(self, x):
z = ops.matmul(x, self.w) + self.b
return z


[14]:

# Instantiate model
model = Network()
# Instantiate loss function
loss_fn = nn.BCEWithLogitsLoss()


[15]:

# Define forward function
def forward_fn(x, y):
z = model(x)
loss = loss_fn(z, y)
return loss


[16]:

grad_fn = mindspore.value_and_grad(forward_fn, None, weights=model.trainable_params())

[17]:

loss, grads = grad_fn(x, y)
print(grads)

[17]:

(Tensor(shape=[5, 3], dtype=Float32, value=
[[ 6.56869709e-02,  5.37334494e-02,  3.01467031e-01],
[ 6.56869709e-02,  5.37334494e-02,  3.01467031e-01],
[ 6.56869709e-02,  5.37334494e-02,  3.01467031e-01],
[ 6.56869709e-02,  5.37334494e-02,  3.01467031e-01],
[ 6.56869709e-02,  5.37334494e-02,  3.01467031e-01]]),
Tensor(shape=[3], dtype=Float32, value= [ 6.56869709e-02,  5.37334494e-02,  3.01467031e-01]))