# 比较与tf.compat.v1.train.MomentumOptimizer的功能差异 ## tf.compat.v1.train.MomentumOptimizer ```text tf.compat.v1.train.MomentumOptimizer( learning_rate, momentum, use_locking=False, name='Momentum', use_nesterov=False ) -> Tensor ``` 更多内容详见[tf.compat.v1.train.MomentumOptimizer](https://tensorflow.google.cn/versions/r2.6/api_docs/python/tf/compat/v1/train/MomentumOptimizer)。 ## mindspore.nn.Momentum ```text class mindspore.nn.Momentum( params, learning_rate, momentum, weight_decay=0.0, loss_scale=1.0, use_nesterov=False )(gradients) -> Tensor ``` 更多内容详见[mindspore.nn.Momentum](https://www.mindspore.cn/docs/zh-CN/r2.0.0-alpha/api_python/nn/mindspore.nn.Momentum.html)。 ## 差异对比 TensorFlow:实现Momentum算法的优化器。 MindSpore:MindSpore此API实现功能与TensorFlow基本一致。 | 分类 | 子类 | TensorFlow | MindSpore | 差异 | | --- |-----|---------------|---------------|-----------------------------------------------------| |参数 | 参数1 | learning_rate | learning_rate | - | | | 参数2 | momentum | momentum | - | | | 参数3 | use_locking | - | TensorFlow中为是否在更新操作中使用锁,默认值:False。MindSpore无此参数 | | | 参数4 | name | - | 不涉及 | | | 参数5 | use_nesterov | use_nesterov | - | | | 参数6 | - | params | 由Parameter组成的列表或字典组成的列表,TensorFlow中无此参数 | | | 参数7 | - | weight_decay | 权重衰减(L2 penalty),默认值:0.0,TensorFlow中无此参数| | | 参数8 | - | loss_scale | 梯度缩放系数,默认值:1.0,TensorFlow中无此参数 | | | 参数9 | - | gradients | 参数params的梯度,TensorFlow中无此参数 | ### 代码示例 > 两API实现的功能基本一致。 ```python # TensorFlow import numpy as np import tensorflow as tf def forward_tensorflow_impl(input_np, label_np_onehot, output_channels, weight_np, bias_np, epoch, locking=False, lr=0.1, momentum=0.0, use_nesterov=False, dtype=np.float32): tf.compat.v1.disable_eager_execution() input_tf = tf.constant(input_np, dtype=np.float32) label = tf.constant(label_np_onehot) if has_bias: net = tf.compat.v1.layers.dense(inputs=input_tf, units=output_channels, use_bias=True, kernel_initializer=tf.compat.v1.constant_initializer( weight_np.transpose(1, 0), dtype=np.float32), bias_initializer=tf.compat.v1.constant_initializer(bias_np, dtype=np.float32) ) else: net = tf.compat.v1.layers.dense(inputs=input_tf, units=output_channels, use_bias=False, kernel_initializer=tf.compat.v1.constant_initializer( weight_np.transpose(1, 0), dtype=np.float32) ) criterion = tf.compat.v1.losses.softmax_cross_entropy(onehot_labels=label, logits=net, reduction=tf.compat.v1.losses.Reduction.MEAN) opt = tf.compat.v1.train.MomentumOptimizer(learning_rate=lr, momentum=momentum, use_locking=locking, use_nesterov=use_nesterov).minimize(criterion) init = tf.compat.v1.global_variables_initializer() with tf.compat.v1.Session() as ss: ss.run(init) num = epoch for _ in range(0, num): criterion.eval() ss.run(opt) output = net.eval() return output.astype(dtype) input_np = np.array([[0.1, 0.2, 0.3],[0.4, 0.5, 0.6],[0.7, 0.8, 0.9]], dtype=np.float32) weight_np = np.array([[0.1, 0.2, 0.3],[0.4, 0.5, 0.6],[0.7, 0.8, 0.9]], dtype=np.float32) label_np = np.array([0, 2, 1]) label_np_onehot = np.zeros(shape=(3, 3)).astype(np.float32) label_np_onehot[np.arange(3), label_np] = 1.0 has_bias = True bias_np = np.array([0.1, 0.2, 0.3], dtype=np.float32) input_channels = 3 output_channels = 3 epoch = 3 output = forward_tensorflow_impl(input_np, label_np_onehot, output_channels, weight_np, bias_np, epoch, locking=False, lr=0.1, momentum=0.0, use_nesterov=False, dtype=np.float32) print(output) # [[0.28361297 0.5488669 0.72752017] # [0.4602843 1.0305372 1.4191785 ] # [0.6369556 1.5122076 2.1108367 ]] # MindSpore import numpy as np from mindspore import Tensor from mindspore.nn import Momentum as Momentum from mindspore.nn import Dense, SoftmaxCrossEntropyWithLogits from mindspore.nn import WithLossCell, TrainOneStepCell def forward_mindspore_impl(input_np, weight_np, label_np_onehot, has_bias, bias_np, input_channels, output_channels, epoch, learning_rate=0.1, weight_decay=0.0, momentum=0.0, loss_scale=1.0, use_nesterov=False): input = Tensor(input_np) weight = Tensor(weight_np) label = Tensor(label_np_onehot) if has_bias: bias = Tensor(bias_np) net = Dense(in_channels=input_channels, out_channels=output_channels, weight_init=weight, bias_init=bias, has_bias=True) else: net = Dense(in_channels=input_channels, out_channels=output_channels, weight_init=weight, has_bias=False) criterion = SoftmaxCrossEntropyWithLogits(reduction='mean') optimizer = Momentum(params=net.trainable_params(), learning_rate=learning_rate, weight_decay=weight_decay, momentum=momentum, loss_scale=loss_scale, use_nesterov=use_nesterov) net_with_criterion = WithLossCell(net, criterion) train_network = TrainOneStepCell(net_with_criterion, optimizer) train_network.set_train() for _ in range(epoch): train_network(input, label) output = net(input) return output.asnumpy() input_np = np.array([[0.1, 0.2, 0.3],[0.4, 0.5, 0.6],[0.7, 0.8, 0.9]], dtype=np.float32) weight_np = np.array([[0.1, 0.2, 0.3],[0.4, 0.5, 0.6],[0.7, 0.8, 0.9]], dtype=np.float32) label_np = np.array([0, 2, 1]) label_np_onehot = np.zeros(shape=(3, 3)).astype(np.float32) label_np_onehot[np.arange(3), label_np] = 1.0 has_bias = True bias_np = np.array([0.1, 0.2, 0.3], dtype=np.float32) input_channels = 3 output_channels = 3 epoch = 3 out = forward_mindspore_impl(input_np, weight_np, label_np_onehot, has_bias, bias_np, input_channels, output_channels, epoch, learning_rate=0.1, weight_decay=0.0, momentum=0.0, loss_scale=1.0, use_nesterov=False) print(out) # [[0.28361297 0.5488669 0.72752017] # [0.4602843 1.0305372 1.4191784 ] # [0.6369556 1.5122076 2.1108367 ]] ```