[{"data":1,"prerenderedAt":459},["ShallowReactive",2],{"content-query-eBTdbjiNM3":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"category":13,"body":14,"_type":453,"_id":454,"_source":455,"_file":456,"_stem":457,"_extension":458},"/technology-blogs/zh/674","zh",false,"","自此告别互信息：用于跨模态行人重识别的变分蒸馏技术","MindSpore优秀论文解读系列","2021-08-05","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2021/08/05/897ee155c8be453c91d39db05ca831e4.png","technology-blogs","大V博文",{"type":15,"children":16,"toc":443},"root",[17,25,31,44,55,92,97,106,130,140,158,165,170,175,181,186,196,201,211,217,222,231,236,242,250,255,260,268,273,279,284,302,307,315,320,328,333,338,346,351,356,361,369,375,380,385,390,402,414,419,431],{"type":18,"tag":19,"props":20,"children":22},"element","h1",{"id":21},"自此告别互信息用于跨模态行人重识别的变分蒸馏技术",[23],{"type":24,"value":8},"text",{"type":18,"tag":26,"props":27,"children":28},"p",{},[29],{"type":24,"value":30},"作者：李锐锋",{"type":18,"tag":26,"props":32,"children":33},{},[34,36],{"type":24,"value":35},"作者主页：",{"type":18,"tag":37,"props":38,"children":42},"a",{"href":39,"rel":40},"https://www.zhihu.com/people/risenberg",[41],"nofollow",[43],{"type":24,"value":39},{"type":18,"tag":26,"props":45,"children":46},{},[47,49],{"type":24,"value":48},"文章来源：",{"type":18,"tag":37,"props":50,"children":53},{"href":51,"rel":52},"https://zhuanlan.zhihu.com/p/396288749",[41],[54],{"type":24,"value":51},{"type":18,"tag":26,"props":56,"children":57},{},[58,65,72,74,81,83,90],{"type":18,"tag":37,"props":59,"children":62},{"href":60,"rel":61},"https://link.zhihu.com/?target=http://mindspore.cn/",[41],[63],{"type":24,"value":64},"MindSpore",{"type":18,"tag":37,"props":66,"children":69},{"href":67,"rel":68},"https://link.zhihu.com/?target=https://gitee.com/mindspore/",[41],[70],{"type":24,"value":71},"作为一个端边云协同的的全场景AI开源框架",{"type":24,"value":73},"，为开发者带来编程更简单、调试更轻松、性能更卓越、部署更灵活、使用更安全的体验，2020.3.28开源来得到数五十万以上的下载量，走入100+高校教学，拥有数量众多的开发者，在AI计算中心，云、CT、消费者1+8+N等端边云全场景逐步广泛引用，是Gitee指数最高的开源软件。欢迎大家参与开源贡献、",{"type":18,"tag":37,"props":75,"children":78},{"href":76,"rel":77},"https://link.zhihu.com/?target=https://mp.weixin.qq.com/s/RYEEq9Wt6DjSmJ2Xt-KfGQ",[41],[79],{"type":24,"value":80},"模型众智合作",{"type":24,"value":82},"、行业创新与应用、算法创新、学术合作、AI书籍合作等，贡献您在云侧、",{"type":18,"tag":37,"props":84,"children":87},{"href":85,"rel":86},"https://link.zhihu.com/?target=https://www.mindspore.cn/lite",[41],[88],{"type":24,"value":89},"端侧",{"type":24,"value":91},"、边侧以及安全领域的应用案例。",{"type":18,"tag":26,"props":93,"children":94},{},[95],{"type":24,"value":96},"基于MindSpore的AI顶会论文越来越多，MindSpore社区会不定期挑选一些优秀的论文来推送和解读，希望更多的学术界专家跟MindSpore合作，一起推动原创AI研究，MindSpore社区会持续支撑好AI原创和AI应用，本次我们选择了来自国内高校在CVPR2021的一篇论文进行解读，感谢谢老师团队投稿。",{"type":18,"tag":26,"props":98,"children":99},{},[100],{"type":18,"tag":101,"props":102,"children":103},"strong",{},[104],{"type":24,"value":105},"1.研究背景",{"type":18,"tag":26,"props":107,"children":108},{},[109,114,116,121,123,128],{"type":18,"tag":101,"props":110,"children":111},{},[112],{"type":24,"value":113},"研究背景介绍",{"type":24,"value":115},"：信息瓶颈（",{"type":18,"tag":101,"props":117,"children":118},{},[119],{"type":24,"value":120},"I",{"type":24,"value":122},"nformation ",{"type":18,"tag":101,"props":124,"children":125},{},[126],{"type":24,"value":127},"B",{"type":24,"value":129},"ottleneck, IB）是一种信息论指导下的表征学习方法，在本世纪初由Naftali Tishby教授等人提出，其核心目标为提纯对任务有帮助的判别性信息，同时消除输入中包含的冗余信息。虽然其理念十分先进，但实践中存在诸多缺陷，严重限制了其有效性和应用价值。",{"type":18,"tag":26,"props":131,"children":132},{},[133,138],{"type":18,"tag":101,"props":134,"children":135},{},[136],{"type":24,"value":137},"论文研究方向",{"type":24,"value":139},"：此次工作围绕信息瓶颈的三个主要痛点展开，重点解决了高维空间中互信息难以估计、信息瓶颈优化机制中“充分性-简洁性”的权衡、以及对多视图数据乏力等难题。",{"type":18,"tag":26,"props":141,"children":142},{},[143,148,150,156],{"type":18,"tag":101,"props":144,"children":145},{},[146],{"type":24,"value":147},"团队背景介绍",{"type":24,"value":149},"：所在团队由吴文俊科学技术奖自然科学奖、上海市科技进步特等奖获得者谢源教授领衔。团队长期从事机器学习、计算机视觉与模式识别等方面的科研工作,有扎实的研究基础和丰富的成果积累（AI与CV顶会年均产出4",{"type":18,"tag":151,"props":152,"children":153},"del",{},[154],{"type":24,"value":155},"8篇，AI与CV顶刊年均产出3",{"type":24,"value":157},"5篇），并形成了一系列自有知识产权的国际领先的科研成果。",{"type":18,"tag":159,"props":160,"children":162},"h2",{"id":161},"_2论文主要内容简介",[163],{"type":24,"value":164},"2.论文主要内容简介",{"type":18,"tag":26,"props":166,"children":167},{},[168],{"type":24,"value":169},"论文针对信息瓶颈的优化提出了一种全新、可扩展的解析解，在避免高维空间互信息估算的前提下，对其进行更为精确的拟合。在此基础上，论文将信息瓶颈优化目标等效地简化为一项KL-散度，使其可以在保存判别性信息的同时消除冗余，彻底解决了原有优化机制中“充分性-简洁性”的权衡难题，显著提升了信息瓶颈的实践价值。为提升应对多视图问题的能力，文中通过对涉及多视图变量的互信息进行数次交叉分解，提出一种有效保存视图一致性的方法。在其作用下，表征可以显著提升自身对于视图变化的鲁棒性，从而更好地保存泛化性信息。",{"type":18,"tag":26,"props":171,"children":172},{},[173],{"type":24,"value":174},"论文通过大量、详实的消融实验在实践中验证了理论的正确性，并在与SOTA方法的对比中，以极简的网络结构大幅领先于所有相关方法，证实了理论的有效性。",{"type":18,"tag":159,"props":176,"children":178},{"id":177},"_3代码链接",[179],{"type":24,"value":180},"3.代码链接",{"type":18,"tag":26,"props":182,"children":183},{},[184],{"type":24,"value":185},"论文链接：",{"type":18,"tag":26,"props":187,"children":188},{},[189],{"type":18,"tag":37,"props":190,"children":193},{"href":191,"rel":192},"https://link.zhihu.com/?target=https://arxiv.org/abs/2104.02862",[41],[194],{"type":24,"value":195},"https://arxiv.org/abs/2104.02862",{"type":18,"tag":26,"props":197,"children":198},{},[199],{"type":24,"value":200},"基于MindSpore实现的代码开源链接：",{"type":18,"tag":26,"props":202,"children":203},{},[204],{"type":18,"tag":37,"props":205,"children":208},{"href":206,"rel":207},"https://link.zhihu.com/?target=https://gitee.com/mindspore/contrib/tree/master/papers/MVD",[41],[209],{"type":24,"value":210},"https://gitee.com/mindspore/contrib/tree/master/papers/MVD",{"type":18,"tag":159,"props":212,"children":214},{"id":213},"_4算法框架技术要点",[215],{"type":24,"value":216},"4.算法框架技术要点",{"type":18,"tag":26,"props":218,"children":219},{},[220],{"type":24,"value":221},"论文中所有实验均以标准评测准则开展于SYSU-MM01与RegDB数据集。如图一所示，算法框架（模型）总共包括两部分，即用于处理单一模态两条的modal-specific分支，以及一条同时处理不同模态数据的modal-shared分支。每条分支仅包括一个编码器以及一个信息瓶颈，两者分别采用ResNet-50以及一个三层MLP实现。",{"type":18,"tag":26,"props":223,"children":224},{},[225],{"type":18,"tag":226,"props":227,"children":230},"img",{"alt":228,"src":229},"1.jpg","https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/05/144623mscy1r9swcnpj26k.jpg",[],{"type":18,"tag":26,"props":232,"children":233},{},[234],{"type":24,"value":235},"图一：算法框架（模型结构图）",{"type":18,"tag":159,"props":237,"children":239},{"id":238},"_5实验结果",[240],{"type":24,"value":241},"5.实验结果",{"type":18,"tag":26,"props":243,"children":244},{},[245],{"type":18,"tag":226,"props":246,"children":249},{"alt":247,"src":248},"2.jpg","https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/05/144722s9jsk2jtev3d1wt8.jpg",[],{"type":18,"tag":26,"props":251,"children":252},{},[253],{"type":24,"value":254},"论文模型在SYSU-MM01数据集以及RegDB数据集上结果分别如下图所示：",{"type":18,"tag":26,"props":256,"children":257},{},[258],{"type":24,"value":259},"图二：论文模型在SYSU-MM01数据集实验结果",{"type":18,"tag":26,"props":261,"children":262},{},[263],{"type":18,"tag":226,"props":264,"children":267},{"alt":265,"src":266},"3.jpg","https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/05/144846vnlu7tr1dxijkvum.jpg",[],{"type":18,"tag":26,"props":269,"children":270},{},[271],{"type":24,"value":272},"图三：论文模型在RegDB数据集实验结果",{"type":18,"tag":159,"props":274,"children":276},{"id":275},"_6mindspore代码实现",[277],{"type":24,"value":278},"6.MindSpore代码实现",{"type":18,"tag":26,"props":280,"children":281},{},[282],{"type":24,"value":283},"代码主要包括以下几个模块构成：数据加载、训练器以及模型框架。",{"type":18,"tag":285,"props":286,"children":287},"ol",{},[288,294],{"type":18,"tag":289,"props":290,"children":291},"li",{},[292],{"type":24,"value":293},"数据加载：",{"type":18,"tag":289,"props":295,"children":296},{},[297],{"type":18,"tag":226,"props":298,"children":301},{"alt":299,"src":300},"4.jpg","https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/05/144952rapt1buozinyit09.jpg",[],{"type":18,"tag":26,"props":303,"children":304},{},[305],{"type":24,"value":306},"图四：数据加载（SYSU-MM01 query）",{"type":18,"tag":26,"props":308,"children":309},{},[310],{"type":18,"tag":226,"props":311,"children":314},{"alt":312,"src":313},"5.jpg","https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/05/145040oe8wvgqnpllgolhd.jpg",[],{"type":18,"tag":26,"props":316,"children":317},{},[318],{"type":24,"value":319},"图五：数据加载（SYSU-MM01 gallery）",{"type":18,"tag":26,"props":321,"children":322},{},[323],{"type":18,"tag":226,"props":324,"children":327},{"alt":325,"src":326},"6.jpg","https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/05/1450551h9kmu0xaqsyweml.jpg",[],{"type":18,"tag":26,"props":329,"children":330},{},[331],{"type":24,"value":332},"训练器：",{"type":18,"tag":26,"props":334,"children":335},{},[336],{"type":24,"value":337},"图六：训练循环",{"type":18,"tag":26,"props":339,"children":340},{},[341],{"type":18,"tag":226,"props":342,"children":345},{"alt":343,"src":344},"7.jpg","https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/05/145432abydhcalzq1qgz5h.jpg",[],{"type":18,"tag":26,"props":347,"children":348},{},[349],{"type":24,"value":350},"模型框架：",{"type":18,"tag":26,"props":352,"children":353},{},[354],{"type":24,"value":355},"图七：模型总体框架",{"type":18,"tag":26,"props":357,"children":358},{},[359],{"type":24,"value":360},"图八：信息瓶颈结构",{"type":18,"tag":26,"props":362,"children":363},{},[364],{"type":18,"tag":226,"props":365,"children":368},{"alt":366,"src":367},"8.jpg","https://bbs-img.huaweicloud.com/data/forums/attachment/forum/202108/05/145200kse3uze0m90kukcm.jpg",[],{"type":18,"tag":159,"props":370,"children":372},{"id":371},"_7总结与展望",[373],{"type":24,"value":374},"7.总结与展望",{"type":18,"tag":26,"props":376,"children":377},{},[378],{"type":24,"value":379},"论文为互信息拟合提供了一种全新、可扩展的解析解，并以此改进了信息瓶颈的优化机制。虽然方法可以有效解决多视图学习问题，但复杂度会随着视图数量的增长而急剧上升。因此未来工作可以改进方法对于多视图数据的处理，使其在保证有效性的同时，不再受视图数量的限制。",{"type":18,"tag":26,"props":381,"children":382},{},[383],{"type":24,"value":384},"本文作者在MindSpore社区从事相关AI工作，欢迎您私信加入社区或者微信QQ群，一起用MindSpore赋能千行百业，点亮您的智慧生活。",{"type":18,"tag":26,"props":386,"children":387},{},[388],{"type":24,"value":389},"官方QQ群: 871543426",{"type":18,"tag":26,"props":391,"children":392},{},[393,395],{"type":24,"value":394},"MindSpore官网：",{"type":18,"tag":37,"props":396,"children":399},{"href":397,"rel":398},"https://link.zhihu.com/?target=https://www.mindspore.cn/",[41],[400],{"type":24,"value":401},"https://www.mindspore.cn/",{"type":18,"tag":26,"props":403,"children":404},{},[405,407],{"type":24,"value":406},"MindSpore论坛：",{"type":18,"tag":37,"props":408,"children":411},{"href":409,"rel":410},"https://link.zhihu.com/?target=https://bbs.huaweicloud.com/forum/forum-1076-1.html",[41],[412],{"type":24,"value":413},"https://bbs.huaweicloud.com/forum/forum-1076-1.html",{"type":18,"tag":26,"props":415,"children":416},{},[417],{"type":24,"value":418},"代码仓地址：",{"type":18,"tag":26,"props":420,"children":421},{},[422,424],{"type":24,"value":423},"Gitee-",{"type":18,"tag":37,"props":425,"children":428},{"href":426,"rel":427},"https://link.zhihu.com/?target=https://gitee.com/mindspore/mindspore.git",[41],[429],{"type":24,"value":430},"https://gitee.com/mindspore/mindspore.git",{"type":18,"tag":26,"props":432,"children":433},{},[434,436],{"type":24,"value":435},"GitHub-",{"type":18,"tag":37,"props":437,"children":440},{"href":438,"rel":439},"https://link.zhihu.com/?target=https://github.com/mindspore-ai/mindspore.git",[41],[441],{"type":24,"value":442},"https://github.com/mindspore-ai",{"title":7,"searchDepth":444,"depth":444,"links":445},4,[446,448,449,450,451,452],{"id":161,"depth":447,"text":164},2,{"id":177,"depth":447,"text":180},{"id":213,"depth":447,"text":216},{"id":238,"depth":447,"text":241},{"id":275,"depth":447,"text":278},{"id":371,"depth":447,"text":374},"markdown","content:technology-blogs:zh:674.md","content","technology-blogs/zh/674.md","technology-blogs/zh/674","md",1776506139601]