# 静态图网络编译性能优化

## 使用HyperMap优化编译性能

HyperMap是一个特殊的类，类对象构造时需要传入映射函数f，调用对象时需要传入f的n个参数序列，更多使用方法见：HyperMap。映射函数f必须是MultitypeFuncGraph类型, 可参考MultitypeFuncGraph。在使用for循环批量处理列表元素时，可以通过HyperMap等价语义替换来优化网络编译性能。

[1]:

import time
from mindspore.ops import MultitypeFuncGraph, HyperMap
from mindspore import ops, Tensor
import mindspore as ms

list1 = [Tensor(i) for i in range(200)]
list2 = [Tensor(i) for i in range(200)]
@ms.jit
def hyper_map_net():
return output

start_time = time.time()
output = hyper_map_net()
end_time = time.time()
print("hyper map cost time:", end_time - start_time)

@ms.jit
def for_loop_net():
out = []
for i in range(200):
out.append(i+i)
return out

start_time = time.time()
for_loop_net()
end_time = time.time()
print("for loop cost time:", end_time - start_time)

hyper map cost time: 0.1894233226776123
for loop cost time: 1.2634551525115967


## 使用Select算子优化编译性能

[2]:

import time
from mindspore import ops
import mindspore as ms

@ms.jit
def if_net(x, y):
out = 0
for _ in range(100):
if x < y:
x = x - y
else:
x = x + y
out = out + x
return out

start_time = time.time()
out = if_net(ms.Tensor([0]), ms.Tensor([1]))
end_time = time.time()
print("if net cost time:", end_time - start_time)

@ms.jit
def select_net(x, y):
out = x
for _ in range(100):
cond = x < y
x = ops.Select()(cond, x - y, x + y)
out = out + x
return out

start_time = time.time()
out = select_net(ms.Tensor([0]), ms.Tensor([1]))
end_time = time.time()
print("select net cost time:", end_time - start_time)

if net cost time: 1.1603329181671143
select net cost time: 0.483151912689209


## 使用编译缓存优化编译性能

[3]:

import time
from mindspore import set_context
from mindspore import dtype
import mindspore as ms

@ms.jit
def func(input_x, input_y):
output = input_x
for _ in range(200):
output = input_x + input_x * input_y + output
return output

set_context(enable_compile_cache=False)
x = ms.Tensor([1], dtype.float32)
y = ms.Tensor([2], dtype.float32)
start_time = time.time()
out = func(x, y)
end_time = time.time()
print("Disable comile_cache cost time:", end_time - start_time)

Disable comile_cache cost time: 0.5485098361968994


Disable comile_cache cost time: 0.5485098361968994

Disable comile_cache cost time: 0.4614279270172119


[4]:

import time
from mindspore import set_context
from mindspore import dtype
import mindspore as ms

@ms.jit
def func(input_x, input_y):
output = input_x
for _ in range(200):
output = input_x + input_x * input_y + output
return output

set_context(enable_compile_cache=True, compile_cache_path="my_compile_cache")
x = ms.Tensor([1], dtype.float32)
y = ms.Tensor([2], dtype.float32)
start_time = time.time()
out = func(x, y)
end_time = time.time()
print("Enable comile_cache cost time:", end_time - start_time)

Enable comile_cache cost time: 0.6357541084289551


Enable comile_cache cost time: 0.6357541084289551

Enable comile_cache cost time: 0.09379792213439941


## 使用vmap优化编译性能

MindSpore当前已知支持vmap特性，在处理无依赖关系的批量数据且相关的算子支持vmap功能时，可以使用vmap替代for循环处理批量数据来优化编译性能。vmap的详细介绍可参考vmap。需要注意的是，vmap不仅能优化编译性能，也能优化运行性能。

[5]:

import numpy as np
import time
from mindspore import ops, vmap
import mindspore as ms

def hswish_func(x):
return ops.HSwish()(x)

@ms.jit
def manually_batched(xs):
output = []
for i in range(xs.shape[0]):
output.append(hswish_func(xs[i]))
return ops.stack(output)

shape = (100, 2)
prop = 100
x_np = (np.random.randn(*shape) * prop).astype(np.float32)
x = ms.Tensor(x_np)
x = ops.sub(x, 0)

start_time = time.time()
output_vmap = vmap(hswish_func, in_axes=(0,))(x)
end_time = time.time()
print("vmap cost time:", end_time - start_time)

start_time = time.time()
output_manually = manually_batched(x)
end_time = time.time()
print("for loop cost time:", end_time - start_time)

vmap cost time: 0.05766916275024414
for loop cost time: 1.9284062385559082