[{"data":1,"prerenderedAt":319},["ShallowReactive",2],{"content-query-7FYbG1hK1r":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"body":13,"_type":313,"_id":314,"_source":315,"_file":316,"_stem":317,"_extension":318},"/technology-blogs/zh/3026","zh",false,"","华为诺亚方舟实验室提出GhostNetV2，端侧小模型性能新SOTA","作者：王云鹤 ｜来源：知乎","2024-03-13","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/03/15/b933dc49c44a493fbf3dc1548826d153.png","technology-blogs",{"type":14,"children":15,"toc":308},"root",[16,24,43,51,56,61,72,77,86,94,99,104,112,117],{"type":17,"tag":18,"props":19,"children":21},"element","h1",{"id":20},"华为诺亚方舟实验室提出ghostnetv2端侧小模型性能新sota",[22],{"type":23,"value":8},"text",{"type":17,"tag":25,"props":26,"children":27},"p",{},[28,30,36,38],{"type":23,"value":29},"**作者：**",{"type":17,"tag":31,"props":32,"children":33},"strong",{},[34],{"type":23,"value":35},"王云鹤",{"type":23,"value":37}," ｜",{"type":17,"tag":31,"props":39,"children":40},{},[41],{"type":23,"value":42},"来源：知乎",{"type":17,"tag":25,"props":44,"children":45},{},[46],{"type":17,"tag":47,"props":48,"children":50},"img",{"alt":7,"src":49},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/03/15/3a7e8efd2c7e4243955a221e8268ef64.png",[],{"type":17,"tag":25,"props":52,"children":53},{},[54],{"type":23,"value":55},"图：GhostNetv2与现有轻量化模型比较（ImageNet）",{"type":17,"tag":25,"props":57,"children":58},{},[59],{"type":23,"value":60},"论文见：",{"type":17,"tag":25,"props":62,"children":63},{},[64],{"type":17,"tag":65,"props":66,"children":70},"a",{"href":67,"rel":68},"https://arxiv.org/abs/2211.12905",[69],"nofollow",[71],{"type":23,"value":67},{"type":17,"tag":25,"props":73,"children":74},{},[75],{"type":23,"value":76},"MindSpore代码：",{"type":17,"tag":25,"props":78,"children":79},{},[80],{"type":17,"tag":65,"props":81,"children":84},{"href":82,"rel":83},"https://gitee.com/mindspore/models/tree/master/research/cv/ghostnetv2",[69],[85],{"type":23,"value":82},{"type":17,"tag":25,"props":87,"children":88},{},[89],{"type":17,"tag":31,"props":90,"children":91},{},[92],{"type":23,"value":93},"引言",{"type":17,"tag":25,"props":95,"children":96},{},[97],{"type":23,"value":98},"智能手机等边缘设备计算资源有限，在设计模型时，不仅需要考虑模型的性能，更要考虑其实际的推理速度。计算机视觉领域爆火的Transformer模型在多个任务取得了很高精度，但在端侧设备上运行速度较慢，难以满足实时性的要求。经典的自注意力（self-attention）模块的计算复杂度较高，计算量随着输入分辨率的增加以二次方的速度增长。",{"type":17,"tag":25,"props":100,"children":101},{},[102],{"type":23,"value":103},"尽管目前主流的局部注意力模块（将图像切分为多个区域，在每个区域内分别部署注意力模块）降低了理论复杂度，但图像切分需要大量变换张量形状的操作（比如reshape、transpose等），在端侧设备上会产生很高的时延。比如，将局部注意力模块和轻量化模型GhostNet结合，理论复杂度只增加了20%，但是实际的推理时延却翻了2倍。因此，为轻量化小模型专门设计硬件友好的注意力机制非常有必要。",{"type":17,"tag":25,"props":105,"children":106},{},[107],{"type":17,"tag":31,"props":108,"children":109},{},[110],{"type":23,"value":111},"DFC attention: 基于解耦全连接层的注意力模块",{"type":17,"tag":25,"props":113,"children":114},{},[115],{"type":23,"value":116},"一个适用于端侧小模型的注意力模块应当满足3个条件：",{"type":17,"tag":118,"props":119,"children":120},"ul",{},[121,127,132,137,144,155,162,177,184,212,219,224,231,241,248,275,282,289,294,301],{"type":17,"tag":122,"props":123,"children":124},"li",{},[125],{"type":23,"value":126},"对长距离空间信息的建模能力强。相比CNN，Transformer性能强大的一个重要原因是它能够建模全局空间信息，因此新的注意力模块也应当能捕捉空间长距离信息。",{"type":17,"tag":122,"props":128,"children":129},{},[130],{"type":23,"value":131},"部署高效。注意力模块应该硬件友好，计算高效，以免拖慢推理速度，特别是不应包含硬件不友好的操作。",{"type":17,"tag":122,"props":133,"children":134},{},[135],{"type":23,"value":136},"概念简单。为了保证注意力模块的泛化能力，这个模块的设计应当越简单越好。",{"type":17,"tag":122,"props":138,"children":139},{},[140],{"type":17,"tag":47,"props":141,"children":143},{"alt":7,"src":142},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/03/15/30aa9a8991ed40ae9a5043b00254ca51.png",[],{"type":17,"tag":122,"props":145,"children":146},{},[147,149,153],{"type":23,"value":148},"图：DFC attention。分别沿着垂直、水平两个方向捕捉长距离信息。",{"type":17,"tag":150,"props":151,"children":152},"br",{},[],{"type":23,"value":154},"相比自注意力机制，具有固定权重的全连接 (FC) 层更简单、更容易实现，也可用于生成具有全局感受野的注意力图。详细的计算过程如下所示。",{"type":17,"tag":122,"props":156,"children":157},{},[158],{"type":17,"tag":47,"props":159,"children":161},{"alt":7,"src":160},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/03/15/860fc87543174e88a64fe2cbf7302673.png",[],{"type":17,"tag":122,"props":163,"children":164},{},[165,167,170,172,175],{"type":23,"value":166},"公式（1）",{"type":17,"tag":150,"props":168,"children":169},{},[],{"type":23,"value":171},"在（1）中，生成的特征图a和输入特征的每一个点都直接相关，能够捕捉到全局感受野。但由于输入输出特征上任意两个点都是直接连接的，计算量也很大。",{"type":17,"tag":150,"props":173,"children":174},{},[],{"type":23,"value":176},"本文把全连接层沿着水平和垂直两个方向解耦，分别使用两个全连接层来聚合这两个方向的长距离信息，就可以大幅降低计算复杂度，这样的注意力模块被称为 decoupled fully connected (DFC) attention。",{"type":17,"tag":122,"props":178,"children":179},{},[180],{"type":17,"tag":47,"props":181,"children":183},{"alt":7,"src":182},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/03/15/14782860727e4b5eb8c7be7117e78115.png",[],{"type":17,"tag":122,"props":185,"children":186},{},[187,189,192,194,197,202,205,207,210],{"type":23,"value":188},"公式（2）",{"type":17,"tag":150,"props":190,"children":191},{},[],{"type":23,"value":193},"上式表示了 DFC attention的一般形式，分别沿水平和垂直方向聚合特征。通过共享部分变换权重，可以方便地用卷积实现，从而省去十分耗时的reshape、transpose等操作。为了处理具有不同分辨率的输入图像，卷积核的大小也可以与特征图的大小解耦，即使用大小为1×K_H 和 K_W×1 的两个depthwise卷积作用到于输入特征上。这种策略被TFLite 和 ONNX 等部署工具支持得很好，可以在移动设备上进行快速推理。",{"type":17,"tag":150,"props":195,"children":196},{},[],{"type":17,"tag":31,"props":198,"children":199},{},[200],{"type":23,"value":201},"GhostNetV2:",{"type":17,"tag":150,"props":203,"children":204},{},[],{"type":23,"value":206},"将DFC attention插入到轻量化网络GhostNet中可以提升表征能力，从而构建出新型视觉骨干网络 GhostNetV2。",{"type":17,"tag":150,"props":208,"children":209},{},[],{"type":23,"value":211},"为了弥补Ghost 模块建模空间依赖能力的不足,本文将DFC attention和Ghost 模块并联。即：",{"type":17,"tag":122,"props":213,"children":214},{},[215],{"type":17,"tag":47,"props":216,"children":218},{"alt":7,"src":217},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/03/15/55b6c16d25de479b920ee235865f7897.png",[],{"type":17,"tag":122,"props":220,"children":221},{},[222],{"type":23,"value":223},"这里Sigmoid是归一化函数，A是attention map, V（）表示Ghost模块，X是输入数据。如下图所示，两路特征相乘得到最终的输出。",{"type":17,"tag":122,"props":225,"children":226},{},[227],{"type":17,"tag":47,"props":228,"children":230},{"alt":7,"src":229},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/03/15/0d235d1074b640ea8fb4d64e2f612303.png",[],{"type":17,"tag":122,"props":232,"children":233},{},[234,236,239],{"type":23,"value":235},"图：DFC attention与Ghost模块并联",{"type":17,"tag":150,"props":237,"children":238},{},[],{"type":23,"value":240},"为了减小DFC attention模块所消耗的计算量，本文对DFC这条支路上的特征进行下采样，在更小的特征图上执行一系列变换。同时，本文发现，对一个逆bottleneck结构而言，增强“expressiveness”（bottleneck中间层）比“capacity”（bottleneck输出层）更加有效，因此在GhostNetV2只对中间特征做了增强。GhostNetV2的bottleneck如下图所示。",{"type":17,"tag":122,"props":242,"children":243},{},[244],{"type":17,"tag":47,"props":245,"children":247},{"alt":7,"src":246},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/03/15/35018e64c4a24dcfab75c54912bb1435.png",[],{"type":17,"tag":122,"props":249,"children":250},{},[251,253,265,268,270,273],{"type":23,"value":252},"图：GhostNetV1和GhostNetV2",{"type":17,"tag":254,"props":255,"children":257},"h3",{"id":256},"实验结果",[258],{"type":17,"tag":31,"props":259,"children":260},{},[261],{"type":17,"tag":31,"props":262,"children":263},{},[264],{"type":23,"value":256},{"type":17,"tag":150,"props":266,"children":267},{},[],{"type":23,"value":269},"本文在ImageNet图像分类、COCO目标检测、ADE语义分割等数据集上进行了实验。相比其他架构，GhostNetV2取得了更快的推理速度和更高的精度。",{"type":17,"tag":150,"props":271,"children":272},{},[],{"type":23,"value":274},"下面是ImageNet的实验结果，无论是理论计算量、还是端侧设备上的实测速度，GhostNetV2优于现有方法。",{"type":17,"tag":122,"props":276,"children":277},{},[278],{"type":17,"tag":47,"props":279,"children":281},{"alt":7,"src":280},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/03/15/f2de589e4020407399d1c603c0e0b991.png",[],{"type":17,"tag":122,"props":283,"children":284},{},[285],{"type":17,"tag":47,"props":286,"children":288},{"alt":7,"src":287},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/03/15/58a7cfaa3cdd493394ce24eaace1ee13.png",[],{"type":17,"tag":122,"props":290,"children":291},{},[292],{"type":23,"value":293},"GhostNetV2也可以作为骨干模型，用于目标检测、语义分割等下游任务。结果如下：",{"type":17,"tag":122,"props":295,"children":296},{},[297],{"type":17,"tag":47,"props":298,"children":300},{"alt":7,"src":299},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/03/15/64d8a976228f43ceb53e3753d5eabdfe.png",[],{"type":17,"tag":122,"props":302,"children":303},{},[304],{"type":17,"tag":47,"props":305,"children":307},{"alt":7,"src":306},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/03/15/0bd42a2e038e47c7a8381714f53fd0b7.png",[],{"title":7,"searchDepth":309,"depth":309,"links":310},4,[311],{"id":256,"depth":312,"text":256},3,"markdown","content:technology-blogs:zh:3026.md","content","technology-blogs/zh/3026.md","technology-blogs/zh/3026","md",1776506125466]