Function Differences with tf.compat.v1.train.ProximalAdagradOptimizer

View Source On Gitee

tf.compat.v1.train.ProximalAdagradOptimizer

tf.compat.v1.train.ProximalAdagradOptimizer(
    learning_rate,
    initial_accumulator_value=0.1,
    l1_regularization_strength=0.0,
    l2_regularization_strength=0.0,
    use_locking=False,
    name='ProximalAdagrad'
) -> Tensor

For more information, see tf.compat.v1.train.ProximalAdagradOptimizer.

mindspore.nn.ProximalAdagrad

class mindspore.nn.ProximalAdagrad(
    params,
    accum=0.1,
    learning_rate=0.001,
    l1=0.0,
    l2=0.0,
    use_locking=False,
    loss_scale=1.0,
    weight_decay=0.0
)(grads) -> Tensor

For more information, see mindspore.nn.ProximalAdagrad.

Differences

TensorFlow: Implement the optimizer function of the ProximalAdagrad algorithm.

MindSpore: MindSpore API basically implements the same function as TensorFlow.

Categories

Subcategories

TensorFlow

MindSpore

Differences

Parameters

parameter 1

learning_rate

learning_rate

Same function, no default values for TensorFlow

parameter 2

initial_accumulator_value

accum

Same function, different parameter names

parameter 3

l1_regularization_strength

l1

Same function, different parameter names

parameter 4

l2_regularization_strength

l2

Same function, different parameter names

parameter 5

use_locking

use_locking

-

parameter 6

name

-

Not involved

parameter 7

-

params

A list composed of Parameter classes or a list composed of dictionaries, which are not available in TensorFlow.

parameter 8

-

loss_scale

Gradient scaling factor. Default value: 1.0. TensorFlow does not have this parameter

parameter 9

-

weight_decay

Weight decay (L2 penalty). Default value: 0.0. TensorFlow does not have this parameter

parameter 10

-

grads

Gradient of params in the optimizer. TensorFlow does not have this parameter

Code Example

The two APIs achieve the same function.

# TensorFlow
import tensorflow as tf
import numpy as np

tf.compat.v1.disable_v2_behavior()
tf.compat.v1.disable_eager_execution()

param_np = np.ones(7).astype(np.float32)
indices = np.array([1, 2, 3, 4, 5, 6]).astype(np.int32)
label = np.zeros((2, 3)).astype(np.float32)
label_shape = (2, 3)
axis = 0
epoch = 3
param_tf = tf.Variable(param_np)
indices_tf = tf.Variable(indices)
label = tf.Variable(label)
net = tf.raw_ops.GatherV2(params=param_tf, indices=indices_tf, axis=axis, batch_dims=0, name=None)
net = tf.reshape(net, label_shape)
criterion = tf.compat.v1.losses.softmax_cross_entropy(onehot_labels=label, logits=net,
                                                      reduction=tf.compat.v1.losses.Reduction.MEAN)
opt = tf.compat.v1.train.ProximalAdagradOptimizer(learning_rate=0.001).minimize(criterion)
init = tf.compat.v1.global_variables_initializer()
with tf.compat.v1.Session() as ss:
    ss.run(init)
    for i in range(epoch):
        loss = criterion.eval()
        ss.run(opt)
    output = net.eval()
    net = ss.run(net)
out_tf = output.astype(np.float32)
print(out_tf)
# [[0.9987219 0.9987219 0.9987219]
#  [0.9987219 0.9987219 0.9987219]]

# MindSpore
import numpy as np
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.ops import operations as op
from mindspore import Parameter
from mindspore.nn import SoftmaxCrossEntropyWithLogits
from mindspore.nn import TrainOneStepCell, WithLossCell
from mindspore.nn import Cell

class NetWithGatherV2(Cell):
    def __init__(self, param_np, label, axis=0):
        super(NetWithGatherV2, self).__init__()
        self.param = Parameter(Tensor(param_np), name="w1")
        self.gatherv2 = op.GatherV2()
        self.reshape = op.Reshape()
        self.axis = axis
        self.label = label

    def construct(self, indices):
        x = self.gatherv2(self.param, indices, self.axis)
        return self.reshape(x, self.label)


param_np = np.ones(7).astype(np.float32)
indices = np.array([1, 2, 3, 4, 5, 6]).astype(np.int32)
label = np.zeros((2, 3)).astype(np.float32)
label_shape = (2, 3)
epoch = 3
inputs = Tensor(indices)
label = Tensor(label)
net = NetWithGatherV2(param_np, label_shape, axis=0)
criterion = SoftmaxCrossEntropyWithLogits(reduction='mean')
optimizer = nn.ProximalAdagrad(params=net.trainable_params())
net_with_criterion = WithLossCell(net, criterion)
train_network = TrainOneStepCell(net_with_criterion, optimizer)
train_network.set_train()
for i in range(epoch):
    train_network(inputs, label)
out_ms = net(inputs).asnumpy()
print(out_ms)
# [[0.9987219 0.9987219 0.9987219]
#  [0.9987219 0.9987219 0.9987219]]