[{"data":1,"prerenderedAt":744},["ShallowReactive",2],{"content-query-ngksPmVjG8":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"category":13,"body":14,"_type":738,"_id":739,"_source":740,"_file":741,"_stem":742,"_extension":743},"/technology-blogs/zh/671","zh",false,"","大V博文系列：MindSpore GPU推理性能优化实践","MindSpore 1.3.0版本对GPU推理性能进行优化，性能大幅提升","2021-08-04","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2021/08/04/5f91543bb8cb4f56abf7d5207d99d20b.png","technology-blogs","大V博文",{"type":15,"children":16,"toc":716},"root",[17,25,31,44,55,62,67,73,84,89,98,103,108,113,119,128,133,142,151,156,162,167,174,179,187,192,197,204,209,216,221,227,232,240,245,253,265,271,276,281,289,300,306,329,335,340,346,351,356,361,371,379,384,389,397,402,410,418,423,431,439,444,449,457,463,468,475,481,486,493,498,510,515,520,525,533,538,546,551,559,564,572,577,582,590,595,603,608,613,620,625,632,637,644,649,656,661,668,673,680,685,692,697,704,709],{"type":18,"tag":19,"props":20,"children":22},"element","h1",{"id":21},"大v博文系列mindspore-gpu推理性能优化实践",[23],{"type":24,"value":8},"text",{"type":18,"tag":26,"props":27,"children":28},"p",{},[29],{"type":24,"value":30},"作者：金雪锋",{"type":18,"tag":26,"props":32,"children":33},{},[34,36],{"type":24,"value":35},"作者主页：",{"type":18,"tag":37,"props":38,"children":42},"a",{"href":39,"rel":40},"https://www.zhihu.com/people/jin-xue-feng",[41],"nofollow",[43],{"type":24,"value":39},{"type":18,"tag":26,"props":45,"children":46},{},[47,49],{"type":24,"value":48},"文章来源：",{"type":18,"tag":37,"props":50,"children":53},{"href":51,"rel":52},"https://zhuanlan.zhihu.com/p/396113352",[41],[54],{"type":24,"value":51},{"type":18,"tag":56,"props":57,"children":59},"h2",{"id":58},"_1-引言",[60],{"type":24,"value":61},"1 引言",{"type":18,"tag":26,"props":63,"children":64},{},[65],{"type":24,"value":66},"现今，先进的 AI 服务愈加受到市场青睐，其中包括图像和语音识别、自然语言处理、视觉搜索和个性化推荐。随着数据集和网络规模越来越大，网络计算量也越来越大；于此同时，交互式推理任务中的对时延的严格要求，因此深度神经网络推理任务逐渐向GPU进行迁移。为此，MindSpore 1.3.0版本对GPU推理性能进行优化，性能相比此前大幅提升。",{"type":18,"tag":56,"props":68,"children":70},{"id":69},"_2-推理和训练差异",[71],{"type":24,"value":72},"2 推理和训练差异",{"type":18,"tag":74,"props":75,"children":77},"h3",{"id":76},"_21-学-vs-用",[78],{"type":18,"tag":79,"props":80,"children":81},"strong",{},[82],{"type":24,"value":83},"2.1 学 vs 用",{"type":18,"tag":26,"props":85,"children":86},{},[87],{"type":24,"value":88},"通常深度学习将“学以致用”的分为”学习“和”应用“两个阶段的任务。前者的目的是得到一个能够用于拟合经验数据的模型，在深度学习领域称为训练（training）；后者是对未知数据上进行预测，在深度学习领域称为推理（Inference）。",{"type":18,"tag":74,"props":90,"children":92},{"id":91},"_22-吞吐率-vs-时延",[93],{"type":18,"tag":79,"props":94,"children":95},{},[96],{"type":24,"value":97},"2.2 吞吐率 vs 时延",{"type":18,"tag":26,"props":99,"children":100},{},[101],{"type":24,"value":102},"与训练追求高吞吐率（Throughput）不同，推理强调低时延（latency），二者虽然都可以简单表述为“快”，但在本质上是有区别的。",{"type":18,"tag":26,"props":104,"children":105},{},[106],{"type":24,"value":107},"在神经网络训练过程中，数据是一批一批处理的，其中 Batch Size是一个重要的参数：一个Batch Size中的所有样本共同决定模型参数更新的方向，当Batch Size过小时，这一批次数据不能反映整个数据集的全局特征，无法准确地给出参数收敛的方向，通常导致模型难以收敛。由于受限于显存容量，有时大模型需要采用“数据并行”、“梯度累积”的方式，进一步提升Batch Size，从而提升模型收敛速度。因此，对于训练任务，需要解决的是如何尽快的完成一个批次数据计算。",{"type":18,"tag":26,"props":109,"children":110},{},[111],{"type":24,"value":112},"在推理任务中，请求往往是独立到达的，此时程序需要尽快给出响应，特别是交互式推理任务中对时延有极高要求，在服务合同中称为SLO（Service Level Objectives）。试想举着手机在拍照，还未等对焦完成，手不由自主的抖了一下，这时不得不再次对焦，这种体验并不友好。",{"type":18,"tag":56,"props":114,"children":116},{"id":115},"_3-低时延对框架的诉求",[117],{"type":24,"value":118},"3 低时延对框架的诉求",{"type":18,"tag":74,"props":120,"children":122},{"id":121},"_31高效runtime",[123],{"type":18,"tag":79,"props":124,"children":125},{},[126],{"type":24,"value":127},"3.1高效Runtime：",{"type":18,"tag":26,"props":129,"children":130},{},[131],{"type":24,"value":132},"一般我们不在GPU中做指令（算子到算子之间）跳转，模型中的算子需要由CPU下发；CPU和GPU之间采用pipeline方式执行，GPU在执行当前算子时，CPU可以继续构建并下发后续算子，这意味着最终的时延受限于二者中较慢的设备。对于典型CNN的网络，训练一个step约几十至几百毫秒，而处理一条推理请求只需要几毫秒。也就是说，推理任务对Runtime的性能要求是远高于训练的。从GPU的演进看，堆叠流处理器数量是提升Tensor处理速度的有效手段，通常每一代GPU相比上一代提升均在几倍到十几倍之间，而CPU的单核处理能力增长有限，此时CPU就更容易成为性能瓶颈。这也就意味着，随着GPU的迭代，留给Runtime的时间将越来越少。",{"type":18,"tag":26,"props":134,"children":135},{},[136],{"type":18,"tag":137,"props":138,"children":141},"img",{"alt":139,"src":140},"1.jpg","https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/04/092010ldztqtxvj9lnfczb.jpg",[],{"type":18,"tag":74,"props":143,"children":145},{"id":144},"_32-更加丰富的图优化",[146],{"type":18,"tag":79,"props":147,"children":148},{},[149],{"type":24,"value":150},"3.2 更加丰富的图优化",{"type":18,"tag":26,"props":152,"children":153},{},[154],{"type":24,"value":155},"相比于训练，通常推理具备更大的优化空间",{"type":18,"tag":74,"props":157,"children":159},{"id":158},"_321-常量折叠",[160],{"type":24,"value":161},"3.2.1 常量折叠",{"type":18,"tag":26,"props":163,"children":164},{},[165],{"type":24,"value":166},"以BatchNorm算法如下：",{"type":18,"tag":26,"props":168,"children":169},{},[170],{"type":18,"tag":137,"props":171,"children":173},{"alt":7,"src":172},"https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/04/092119kadb7c1rkqfhhnqy.png",[],{"type":18,"tag":26,"props":175,"children":176},{},[177],{"type":24,"value":178},"μ、ρ、γ、β是模型的权重，在训练阶段 μ和ρ通常以EMA方式进行更新，γ和β以梯度下降方式更新：",{"type":18,"tag":26,"props":180,"children":181},{},[182],{"type":18,"tag":137,"props":183,"children":186},{"alt":184,"src":185},"3.jpg","https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/04/092209bgu5ydobfmklnme7.jpg",[],{"type":18,"tag":26,"props":188,"children":189},{},[190],{"type":24,"value":191},"在推理阶段，μ、ρ、γ、β已经固定，因此可以将部分计算可以提前到部署推理任务之前。",{"type":18,"tag":26,"props":193,"children":194},{},[195],{"type":24,"value":196},"令：",{"type":18,"tag":26,"props":198,"children":199},{},[200],{"type":18,"tag":137,"props":201,"children":203},{"alt":7,"src":202},"https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/04/0923408xdtwyxdazu6xqw4.png",[],{"type":18,"tag":26,"props":205,"children":206},{},[207],{"type":24,"value":208},"在模型部署后，BatchNorm仅需要计算",{"type":18,"tag":26,"props":210,"children":211},{},[212],{"type":18,"tag":137,"props":213,"children":215},{"alt":7,"src":214},"https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/04/0924126nakddulq7o97fxj.png",[],{"type":18,"tag":26,"props":217,"children":218},{},[219],{"type":24,"value":220},"省去了add，sub，mul，div，pow，sqrt等计算，降低计算量，提升推理性能",{"type":18,"tag":74,"props":222,"children":224},{"id":223},"_322-算子融合",[225],{"type":24,"value":226},"3.2.2 算子融合",{"type":18,"tag":26,"props":228,"children":229},{},[230],{"type":24,"value":231},"在图优化中，算子融合一向是提升执行性能最有效的手段之一。训练执行流程如下图所示，正反向算子之间往往需要数据通信，这一定程度上阻止了相邻算子之间的融合，或者降低融合的效果。",{"type":18,"tag":26,"props":233,"children":234},{},[235],{"type":18,"tag":137,"props":236,"children":239},{"alt":237,"src":238},"19.jpg","https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/04/0925138g6qc30ztbpecwbz.jpg",[],{"type":18,"tag":26,"props":241,"children":242},{},[243],{"type":24,"value":244},"在推理时不存在反向图结构，这带来更大的算子融合空间。例如在推理时，我们可以将多个Conv2d融合成为一个算子，提升GPU的利用率",{"type":18,"tag":26,"props":246,"children":247},{},[248],{"type":18,"tag":137,"props":249,"children":252},{"alt":250,"src":251},"7.jpg","https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/04/092533sduhp93xm1oiwfob.jpg",[],{"type":18,"tag":26,"props":254,"children":255},{},[256,258],{"type":24,"value":257},"参考：",{"type":18,"tag":37,"props":259,"children":262},{"href":260,"rel":261},"https://link.zhihu.com/?target=https://arxiv.org/pdf/2102.02344.pdf",[41],[263],{"type":24,"value":264},"https://arxiv.org/pdf/2102.02344.pdf",{"type":18,"tag":74,"props":266,"children":268},{"id":267},"_323-低精度计算",[269],{"type":24,"value":270},"3.2.3 低精度计算",{"type":18,"tag":26,"props":272,"children":273},{},[274],{"type":24,"value":275},"在深度神经网络中，梯度通常在0附近，优化器驱动权重以Learning Rate为步长更新。低精度数据类型表示的动态范围有限，容易出现梯度消失，导致模型得不到更新。",{"type":18,"tag":26,"props":277,"children":278},{},[279],{"type":24,"value":280},"下图展示了激活函数的梯度分布，采用FP16训练时，将会丢弃大部分有效信息。因此，训练过程中，以FP32和FP16/BF16为主；在推理任务中不需要对模型的权重进行更新，可以采用更低的计算精度，得到无损、微损的计算结果。",{"type":18,"tag":26,"props":282,"children":283},{},[284],{"type":18,"tag":137,"props":285,"children":288},{"alt":286,"src":287},"8.jpg","https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/04/092630sizaics0plnzfbef.jpg",[],{"type":18,"tag":26,"props":290,"children":291},{},[292,293],{"type":24,"value":257},{"type":18,"tag":37,"props":294,"children":297},{"href":295,"rel":296},"https://link.zhihu.com/?target=https://arxiv.org/pdf/1710.03740.pdf",[41],[298],{"type":24,"value":299},"https://arxiv.org/pdf/1710.03740.pdf",{"type":18,"tag":74,"props":301,"children":303},{"id":302},"_324-算子优化",[304],{"type":24,"value":305},"3.2.4 算子优化",{"type":18,"tag":26,"props":307,"children":308},{},[309,311,318,320,327],{"type":24,"value":310},"Batch Size不同，导致算子对硬件资源利用率也会有差异，此时需要考虑算子调度策略和Tuning参数，有兴趣的同学可以参考：Rammer: Enabling Holistic Deep Learning Compiler Optimizations with rTasks (",{"type":18,"tag":37,"props":312,"children":315},{"href":313,"rel":314},"https://link.zhihu.com/?target=https://www.usenix.org/conference/osdi20/presentation/ma",[41],[316],{"type":24,"value":317},"https://www.usenix.org/conference/osdi20/presentation/ma",{"type":24,"value":319},"), IOS: INTER-OPERATOR SCHEDULER FOR CNN ACCELERATION (",{"type":18,"tag":37,"props":321,"children":324},{"href":322,"rel":323},"https://link.zhihu.com/?target=https://arxiv.org/pdf/2011.01302.pdf",[41],[325],{"type":24,"value":326},"https://arxiv.org/pdf/2011.01302.pdf",{"type":24,"value":328},")",{"type":18,"tag":74,"props":330,"children":332},{"id":331},"_325-其它",[333],{"type":24,"value":334},"3.2.5 其它",{"type":18,"tag":26,"props":336,"children":337},{},[338],{"type":24,"value":339},"例如剪枝和稀疏化，由于篇幅所限，这里不便展开分析，有兴趣的同学可以查阅相关文档。",{"type":18,"tag":56,"props":341,"children":343},{"id":342},"_4-mindspore方案介绍",[344],{"type":24,"value":345},"4 MindSpore方案介绍",{"type":18,"tag":26,"props":347,"children":348},{},[349],{"type":24,"value":350},"TensorRT是一款高性能推理库，同时还可更大限度地降低延迟。 TensorRT提供了Python和C++接口，允许开发者直接导入ONNX、Caffe、UFF格式的模型，同时也支持逐算子构建网络。TensoRT内置的图优化、内存管理，推理引擎，可以大幅提升推理性能。因此，目前业界主流框架，例如TensorFlow、Pytorch、MxNet、TVM等均已经集成了TensorRT。于此同时，TensorRT还提供了低精度推理，Int8量化校准、自定义插件等功能、优化后模型导出，进一步提升推理性能和适用网络。",{"type":18,"tag":26,"props":352,"children":353},{},[354],{"type":24,"value":355},"另一方面，TensorRT也存在一定的限制：TensorRT支持float、half、int、bool数据类型，暂时不支持double或者int64数据。另一方面，TensorRT目前内置了约80个算子，而MindSpore目前已支持近300个正向算子。因此MindSpore很多现有模型无法在TensorRT库进行推理。",{"type":18,"tag":26,"props":357,"children":358},{},[359],{"type":24,"value":360},"为了同时兼顾MindSpore算子多样性和TensorRT推理性能，MindSpore采用了自动子图拆分。具体实现可以分为四步：",{"type":18,"tag":362,"props":363,"children":364},"ul",{},[365],{"type":18,"tag":366,"props":367,"children":368},"li",{},[369],{"type":24,"value":370},"**第一步：**在后端图优化中，根据TensorRT的能力，将MindIR中的算子进行标注，并将连续的标注算子拆分成一张子图",{"type":18,"tag":26,"props":372,"children":373},{},[374],{"type":18,"tag":137,"props":375,"children":378},{"alt":376,"src":377},"9.jpg","https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/04/092845fkmufy8lsczeewkl.jpg",[],{"type":18,"tag":26,"props":380,"children":381},{},[382],{"type":24,"value":383},"Conv2D, BN, ReLU节点首先被标注为“可以转换为TensorRT Network”，随后连续的算子被拆分为一张子图",{"type":18,"tag":26,"props":385,"children":386},{},[387],{"type":24,"value":388},"在这个过程中，需要避免子图拆分后，整个MindIR形成dead loop。",{"type":18,"tag":26,"props":390,"children":391},{},[392],{"type":18,"tag":137,"props":393,"children":396},{"alt":394,"src":395},"10.jpg","https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/04/0929211w80ccqqbnxyhlp0.jpg",[],{"type":18,"tag":26,"props":398,"children":399},{},[400],{"type":24,"value":401},"上图中，如果简单的将蓝色算子拆分为一张子图会导致形成dead loop，这种情况在需要避免",{"type":18,"tag":362,"props":403,"children":404},{},[405],{"type":18,"tag":366,"props":406,"children":407},{},[408],{"type":24,"value":409},"**第二步：**将MindIR子图传递给TensorRT，构建TensorRT Network",{"type":18,"tag":26,"props":411,"children":412},{},[413],{"type":18,"tag":137,"props":414,"children":417},{"alt":415,"src":416},"11.jpg","https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/04/092946u6ksfzgnbygh45hy.jpg",[],{"type":18,"tag":26,"props":419,"children":420},{},[421],{"type":24,"value":422},"将MindIR子图转换为TensorRT的Network，部分算子可以简单映射，其它算子需要转换",{"type":18,"tag":362,"props":424,"children":425},{},[426],{"type":18,"tag":366,"props":427,"children":428},{},[429],{"type":24,"value":430},"**第三步：**将TensorRT Network进行序列化，使用序列化之后的数据构建TrtNode",{"type":18,"tag":26,"props":432,"children":433},{},[434],{"type":18,"tag":137,"props":435,"children":438},{"alt":436,"src":437},"12.jpg","https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/04/093019rvsinv8qll4hghre.jpg",[],{"type":18,"tag":26,"props":440,"children":441},{},[442],{"type":24,"value":443},"TensorRT提供了Network的序列化。序列化数据中承载了网络结构，模型权重，和Auto Tuning信息。",{"type":18,"tag":26,"props":445,"children":446},{},[447],{"type":24,"value":448},"序列化数据存储为TrtNode的Attribute",{"type":18,"tag":362,"props":450,"children":451},{},[452],{"type":18,"tag":366,"props":453,"children":454},{},[455],{"type":24,"value":456},"**第四步：**在模型推理时，TrtNode节点将被反序列化，并交由TensorRT 的cudaEngine执行，剩余算子则由MindSpore原生后端执行",{"type":18,"tag":56,"props":458,"children":460},{"id":459},"_5-性能测试",[461],{"type":24,"value":462},"5 性能测试",{"type":18,"tag":26,"props":464,"children":465},{},[466],{"type":24,"value":467},"为了对比优化后的推性能，我们在model_zoo目录下选择了一些典型的网络，分别使用MindSpore1.2.1和MindSpore1.3.0测试了10组数据，统计推理请求平均执行时间，平均推理延时降低了2~10倍。详细的测试环境和测试过程参考附录。",{"type":18,"tag":26,"props":469,"children":470},{},[471],{"type":18,"tag":137,"props":472,"children":474},{"alt":7,"src":473},"https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/04/093124tambvt14r3vixlsg.png",[],{"type":18,"tag":56,"props":476,"children":478},{"id":477},"_6-附录",[479],{"type":24,"value":480},"6 附录",{"type":18,"tag":26,"props":482,"children":483},{},[484],{"type":24,"value":485},"1、测试环境软硬件配置如下：",{"type":18,"tag":26,"props":487,"children":488},{},[489],{"type":18,"tag":137,"props":490,"children":492},{"alt":7,"src":491},"https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/04/093154owxylv9wrahxs6ee.png",[],{"type":18,"tag":26,"props":494,"children":495},{},[496],{"type":24,"value":497},"2、测试过程如下",{"type":18,"tag":26,"props":499,"children":500},{},[501,503],{"type":24,"value":502},"部署方法过程参考官网教程《基于MindSpore Serving部署推理服务》，",{"type":18,"tag":37,"props":504,"children":507},{"href":505,"rel":506},"https://link.zhihu.com/?target=https://www.mindspore.cn/tutorial/inference/zh-CN/r1.2/serving_example.html",[41],[508],{"type":24,"value":509},"https://www.mindspore.cn/tutorial/inference/zh-CN/r1.2/serving_example.html",{"type":18,"tag":26,"props":511,"children":512},{},[513],{"type":24,"value":514},"这里以AlexNet为例，其它网络类似",{"type":18,"tag":26,"props":516,"children":517},{},[518],{"type":24,"value":519},"1.导出MindIR模型",{"type":18,"tag":26,"props":521,"children":522},{},[523],{"type":24,"value":524},"对export.py做少量修改：导出模型格式为”MindIR”，设备类型为”GPU”",{"type":18,"tag":26,"props":526,"children":527},{},[528],{"type":18,"tag":137,"props":529,"children":532},{"alt":530,"src":531},"21.jpg","https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/04/093356gawxmj0kofpow5mi.jpg",[],{"type":18,"tag":26,"props":534,"children":535},{},[536],{"type":24,"value":537},"执行export.py文件，可以看到在当前目录下生成了alexnet.mindir模型",{"type":18,"tag":26,"props":539,"children":540},{},[541],{"type":18,"tag":137,"props":542,"children":545},{"alt":543,"src":544},"14.jpg","https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/04/093434whctagu5db5wkxuj.jpg",[],{"type":18,"tag":26,"props":547,"children":548},{},[549],{"type":24,"value":550},"2）部署Serving",{"type":18,"tag":26,"props":552,"children":553},{},[554],{"type":18,"tag":137,"props":555,"children":558},{"alt":556,"src":557},"15.jpg","https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/04/093458djxti14yz1ebepj9.jpg",[],{"type":18,"tag":26,"props":560,"children":561},{},[562],{"type":24,"value":563},"servable_config.py配置如下，这里开启fp16推理模式",{"type":18,"tag":26,"props":565,"children":566},{},[567],{"type":18,"tag":137,"props":568,"children":571},{"alt":569,"src":570},"16.jpg","https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/04/093516x6061llevwr36g40.jpg",[],{"type":18,"tag":26,"props":573,"children":574},{},[575],{"type":24,"value":576},"3）启动Serving",{"type":18,"tag":26,"props":578,"children":579},{},[580],{"type":24,"value":581},"需要注意的是，这里我们需要将TensorRT的library路径加入到LD_LIBRARY_PATH中",{"type":18,"tag":26,"props":583,"children":584},{},[585],{"type":18,"tag":137,"props":586,"children":589},{"alt":587,"src":588},"17.jpg","https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/04/093617u9gni3rwuxwdcdgv.jpg",[],{"type":18,"tag":26,"props":591,"children":592},{},[593],{"type":24,"value":594},"4）重新打开一个terminal，发起推理请求",{"type":18,"tag":26,"props":596,"children":597},{},[598],{"type":18,"tag":137,"props":599,"children":602},{"alt":600,"src":601},"18.jpg","https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/04/093646xmrw0mr7bfopwwtv.jpg",[],{"type":18,"tag":26,"props":604,"children":605},{},[606],{"type":24,"value":607},"5）考虑到warm up带来的抖动，我们跳过前两次请求，仅统计后续10次的请求相应时间",{"type":18,"tag":26,"props":609,"children":610},{},[611],{"type":24,"value":612},"Lenet",{"type":18,"tag":26,"props":614,"children":615},{},[616],{"type":18,"tag":137,"props":617,"children":619},{"alt":7,"src":618},"https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/04/093800rzq1ysxllxu4ze2e.png",[],{"type":18,"tag":26,"props":621,"children":622},{},[623],{"type":24,"value":624},"AlexNet",{"type":18,"tag":26,"props":626,"children":627},{},[628],{"type":18,"tag":137,"props":629,"children":631},{"alt":7,"src":630},"https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/04/094447s2gaf8ksycr5rlgu.png",[],{"type":18,"tag":26,"props":633,"children":634},{},[635],{"type":24,"value":636},"resnet50",{"type":18,"tag":26,"props":638,"children":639},{},[640],{"type":18,"tag":137,"props":641,"children":643},{"alt":7,"src":642},"https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/04/0945250ltxqdinycbwod1p.png",[],{"type":18,"tag":26,"props":645,"children":646},{},[647],{"type":24,"value":648},"vgg",{"type":18,"tag":26,"props":650,"children":651},{},[652],{"type":18,"tag":137,"props":653,"children":655},{"alt":7,"src":654},"https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/04/094608skxfqbi1fndz6qxy.png",[],{"type":18,"tag":26,"props":657,"children":658},{},[659],{"type":24,"value":660},"InceptionV3",{"type":18,"tag":26,"props":662,"children":663},{},[664],{"type":18,"tag":137,"props":665,"children":667},{"alt":7,"src":666},"https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/04/094639ih3c6puj4yai5irn.png",[],{"type":18,"tag":26,"props":669,"children":670},{},[671],{"type":24,"value":672},"mobilenetv2",{"type":18,"tag":26,"props":674,"children":675},{},[676],{"type":18,"tag":137,"props":677,"children":679},{"alt":7,"src":678},"https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/04/094709disrl8pnetkrj3k8.png",[],{"type":18,"tag":26,"props":681,"children":682},{},[683],{"type":24,"value":684},"Mobilenetv3",{"type":18,"tag":26,"props":686,"children":687},{},[688],{"type":18,"tag":137,"props":689,"children":691},{"alt":7,"src":690},"https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/04/0947502w7xhx3ljao5q3dq.png",[],{"type":18,"tag":26,"props":693,"children":694},{},[695],{"type":24,"value":696},"Bert",{"type":18,"tag":26,"props":698,"children":699},{},[700],{"type":18,"tag":137,"props":701,"children":703},{"alt":7,"src":702},"https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/04/094817ywiflhb2zzxzhjxi.png",[],{"type":18,"tag":26,"props":705,"children":706},{},[707],{"type":24,"value":708},"Wide and Deep",{"type":18,"tag":26,"props":710,"children":711},{},[712],{"type":18,"tag":137,"props":713,"children":715},{"alt":7,"src":714},"https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/04/094846von7qy034kdcpuvf.png",[],{"title":7,"searchDepth":717,"depth":717,"links":718},4,[719,721,726,735,736,737],{"id":58,"depth":720,"text":61},2,{"id":69,"depth":720,"text":72,"children":722},[723,725],{"id":76,"depth":724,"text":83},3,{"id":91,"depth":724,"text":97},{"id":115,"depth":720,"text":118,"children":727},[728,729,730,731,732,733,734],{"id":121,"depth":724,"text":127},{"id":144,"depth":724,"text":150},{"id":158,"depth":724,"text":161},{"id":223,"depth":724,"text":226},{"id":267,"depth":724,"text":270},{"id":302,"depth":724,"text":305},{"id":331,"depth":724,"text":334},{"id":342,"depth":720,"text":345},{"id":459,"depth":720,"text":462},{"id":477,"depth":720,"text":480},"markdown","content:technology-blogs:zh:671.md","content","technology-blogs/zh/671.md","technology-blogs/zh/671","md",1776506139559]