[{"data":1,"prerenderedAt":470},["ShallowReactive",2],{"content-query-3QyI0zi8u0":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"body":13,"_type":464,"_id":465,"_source":466,"_file":467,"_stem":468,"_extension":469},"/technology-blogs/zh/1903","zh",false,"","联邦学习 | 如何在不泄露本地数据的情况下都获得一个有效的模型？","联邦学习：每个参与者拥有自己的数据，并且不希望把自己的数据泄露出去，但他们又想共同训练一个好用的模型。","2022-10-10","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2022/10/24/60e491a548cd4d3dac2b80024e18308b.png","technology-blogs",{"type":14,"children":15,"toc":459},"root",[16,24,33,56,61,73,80,85,104,120,127,132,137,144,148,162,169,174,188,195,200,215,225,233,241,251,259,276,286,294,302,313,321,330,357,367,378,391,409,422,441],{"type":17,"tag":18,"props":19,"children":21},"element","h1",{"id":20},"联邦学习-如何在不泄露本地数据的情况下都获得一个有效的模型",[22],{"type":23,"value":8},"text",{"type":17,"tag":25,"props":26,"children":27},"p",{},[28],{"type":17,"tag":29,"props":30,"children":32},"img",{"alt":7,"src":31},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2022/10/24/18f38fa46b2745af9970ec2c0ecaf2b7.gif",[],{"type":17,"tag":25,"props":34,"children":35},{},[36,42,44,49,51],{"type":17,"tag":37,"props":38,"children":39},"strong",{},[40],{"type":23,"value":41},"作者",{"type":23,"value":43},"：",{"type":17,"tag":37,"props":45,"children":46},{},[47],{"type":23,"value":48},"AI安全Mr.Jin",{"type":23,"value":50}," ｜",{"type":17,"tag":37,"props":52,"children":53},{},[54],{"type":23,"value":55},"来源：知乎",{"type":17,"tag":25,"props":57,"children":58},{},[59],{"type":23,"value":60},"著名杂志《经济学人》刊登过一篇封面文章，将数据比作“新世纪的石油”。毫无疑问，在数字经济时代，能否充分挖掘、使用数据，决定了企业的命运。不论是传统的机器学习，还是当今迅猛发展的人工智能，其核心都是数据驱动。",{"type":17,"tag":25,"props":62,"children":63},{},[64,66,71],{"type":23,"value":65},"数据固然是好东西，但很多有价值的数据都涉及个人隐私或者保密协议。早在2018年，欧盟就正式施行了通用数据保护条例GDPR（General Data Protection Regulation）。在2021年7月份，亚马逊就因为违反GDPR条例被罚款",{"type":17,"tag":37,"props":67,"children":68},{},[69],{"type":23,"value":70},"7.46亿欧元",{"type":23,"value":72},"。除了国外，我国对数据的保护也日益重视，2021年8月20日，第十三届全国人民代表大会常务委员会第三十次会议通过《中华人民共和国个人信息保护法》，这标志着个人数据的保护已经上升到了法律的层面。",{"type":17,"tag":25,"props":74,"children":75},{},[76],{"type":17,"tag":29,"props":77,"children":79},{"alt":7,"src":78},"https://pic1.zhimg.com/80/v2-6d2f325abda183dcece9e7fb42e542ac_720w.webp",[],{"type":17,"tag":25,"props":81,"children":82},{},[83],{"type":23,"value":84},"《中华人民共和国个人信息保护法》部分",{"type":17,"tag":25,"props":86,"children":87},{},[88,90,95,97,102],{"type":23,"value":89},"在数据保护条例的约束下，数据流通必然受到限制，从而形成“",{"type":17,"tag":37,"props":91,"children":92},{},[93],{"type":23,"value":94},"数据孤岛",{"type":23,"value":96},"”现象：许多个人/企业有自己收集的数据，单靠他们本身持有的数据不足以完成一个数据挖掘任务，但他们又不想把自身的数据分享给其它个个体。为了解决数据孤岛难题，学术界和工业界的先驱们提出了多种隐私计算方法，其中就包括",{"type":17,"tag":37,"props":98,"children":99},{},[100],{"type":23,"value":101},"联邦学习",{"type":23,"value":103},"。",{"type":17,"tag":25,"props":105,"children":106},{},[107,109,118],{"type":23,"value":108},"联邦学习的英文是Federated learning，咱先从字面意思猜测一下这是个什么技术。“学习（learning）”，指的就是机器学习了（machine learning），一般是利用一批数据去",{"type":17,"tag":110,"props":111,"children":115},"a",{"href":112,"rel":113},"https://zhuanlan.zhihu.com/p/463019160",[114],"nofollow",[116],{"type":23,"value":117},"训练一个模型",{"type":23,"value":119},"，使模型对于和训练数据同一类的数据，可以正确地分类或者输出一个数值；“联邦（federated）”，一般指两个以上的个体组成的一个群体。结合起来，就是：很多个体（机器）一起进行的机器学习。为啥要很多人一起来训练一个模型呢？自己训自己的不就行了吗？",{"type":17,"tag":25,"props":121,"children":122},{},[123],{"type":17,"tag":29,"props":124,"children":126},{"alt":7,"src":125},"https://pic2.zhimg.com/80/v2-ed168162dc464286bc30c1240a421a89_720w.webp",[],{"type":17,"tag":25,"props":128,"children":129},{},[130],{"type":23,"value":131},"网络图片，侵删",{"type":17,"tag":25,"props":133,"children":134},{},[135],{"type":23,"value":136},"对于某些场景来说还真不行。你比如，大家平时聊天都有发一句话跟一个表情的习惯，现在某输入法想给手机用户训练一个表情包推荐模型，也就是你打一句话，系统就给你推荐一个或多个相关的表情包。对于每个人来说，他/她平时使用的表达方式在一段时间内一般是不变的。也就是说，如果每个人的手机基于自己的历史数据去训练一个表情包推荐模型，那么这个模型只能对你经常说的话给出推荐，当你说一些“新语言”的时候，表情包推荐模型可能失效了，那么肿么办呢？你或许会说，那就把很多人的数据放一起训练啊！那我再问一句：你愿意把你的聊天记录发给被人看吗？",{"type":17,"tag":25,"props":138,"children":139},{},[140],{"type":17,"tag":29,"props":141,"children":143},{"alt":7,"src":142},"https://pic2.zhimg.com/80/v2-32ae2fc884c0b9ba057e5e1dd52ffb1d_720w.webp",[],{"type":17,"tag":25,"props":145,"children":146},{},[147],{"type":23,"value":131},{"type":17,"tag":25,"props":149,"children":150},{},[151,153,160],{"type":23,"value":152},"于是联邦学习就出现了：每个参与者拥有自己的数据，并且不希望把自己的数据泄露出去，但他们又想共同训练一个好用的模型。最初的联邦学习算法",{"type":17,"tag":110,"props":154,"children":157},{"href":155,"rel":156},"https://zhuanlan.zhihu.com/p/472650896#ref_1",[114],[158],{"type":23,"value":159},"[1]",{"type":23,"value":161},"如下：",{"type":17,"tag":25,"props":163,"children":164},{},[165],{"type":17,"tag":29,"props":166,"children":168},{"alt":7,"src":167},"https://pic2.zhimg.com/80/v2-09a9d8fe9888cd5d0c2b4f1c02d95f5d_720w.webp",[],{"type":17,"tag":25,"props":170,"children":171},{},[172],{"type":23,"value":173},"图１　联邦平均聚合算法[1]",{"type":17,"tag":25,"props":175,"children":176},{},[177,179,186],{"type":23,"value":178},"如上图所示，在联邦学习开始前，我们要明确联邦学习过程中的参与方：中心服务器Server和客户端Client。在上面的例子里，一般输入法部署的云服务器就是Server，每个用户的手机就是Client。如图２所示，Server拥有表情包推荐模型（初始模型权重",{"type":17,"tag":110,"props":180,"children":183},{"href":181,"rel":182},"https://zhuanlan.zhihu.com/p/472650896#ref_2",[114],[184],{"type":23,"value":185},"[2]",{"type":23,"value":187},"为 w0 ），每个Client拥有自己的本地数据和模型（模型结构与Server的模型结构一致，权重是随机的）。",{"type":17,"tag":25,"props":189,"children":190},{},[191],{"type":17,"tag":29,"props":192,"children":194},{"alt":7,"src":193},"https://pic4.zhimg.com/80/v2-f1f00f993dcfacd6313a5b0bec944b27_720w.webp",[],{"type":17,"tag":25,"props":196,"children":197},{},[198],{"type":23,"value":199},"图２　联邦学习参与方组成",{"type":17,"tag":25,"props":201,"children":202},{},[203,208,210],{"type":17,"tag":37,"props":204,"children":205},{},[206],{"type":23,"value":207},"接下来开始",{"type":23,"value":209}," t ",{"type":17,"tag":37,"props":211,"children":212},{},[213],{"type":23,"value":214},"轮训练，每一轮：Step １，Server随机从 K 个客户端中选出 St 个，并且把它在这一轮的模型权重 wt （第一轮的模型权重是 w0 ）发送给选中的Client：",{"type":17,"tag":25,"props":216,"children":217},{},[218],{"type":17,"tag":37,"props":219,"children":220},{},[221],{"type":17,"tag":29,"props":222,"children":224},{"alt":7,"src":223},"https://pic4.zhimg.com/80/v2-976a34563b4606c6d5045447baab609b_720w.webp",[],{"type":17,"tag":25,"props":226,"children":227},{},[228],{"type":17,"tag":37,"props":229,"children":230},{},[231],{"type":23,"value":232},"图３　Server下发模型",{"type":17,"tag":25,"props":234,"children":235},{},[236],{"type":17,"tag":37,"props":237,"children":238},{},[239],{"type":23,"value":240},"Step ２，每个被选中的Client把Server下发的模型参数更新到自己本地的模型上，并利用本地数据进行训练（也就是图1的ClientUpdate部分）。训练得到新的模型权重 wt+1k ，并且把 wt+1k 发送给Server：",{"type":17,"tag":25,"props":242,"children":243},{},[244],{"type":17,"tag":37,"props":245,"children":246},{},[247],{"type":17,"tag":29,"props":248,"children":250},{"alt":7,"src":249},"https://pic3.zhimg.com/80/v2-a4c415ff5843e26195d304324aa0611e_720w.webp",[],{"type":17,"tag":25,"props":252,"children":253},{},[254],{"type":17,"tag":37,"props":255,"children":256},{},[257],{"type":23,"value":258},"图４　Client回传训练好的模型权重给Server",{"type":17,"tag":25,"props":260,"children":261},{},[262],{"type":17,"tag":37,"props":263,"children":264},{},[265,267,274],{"type":23,"value":266},"Step ３，Server接收到每个Client发送的权重后，根据下面的公式进行聚合（图１的求和公式下标 t 应该改为 t ）： wt+1=∑k=1Knknwt+1k 其中 nk 是每个客户端训练数据的数量， n=∑k=1Knk 。需要注意的是，对于 k∉St ， ＝wt+1k＝wt ；而且在很多场景中，一般是这样计算 wt+1 的（这个问题我在这里做过解释　",{"type":17,"tag":110,"props":268,"children":271},{"href":269,"rel":270},"https://www.zhihu.com/question/516673979/answer/2349972327",[114],[272],{"type":23,"value":273},"联邦学习的经典算法FedAvg中，到底是选择所有客户端模型进行聚合，还是仅聚合被选中的客户端模型？",{"type":23,"value":275},"）： wt+1=∑k∈Stnknwt+1k(n=∑k=1C∗Knk) 接下来，Server会把聚合好的模型 wt+1 下发给所有Client（其实就相当于Step 1的行为）：",{"type":17,"tag":25,"props":277,"children":278},{},[279],{"type":17,"tag":37,"props":280,"children":281},{},[282],{"type":17,"tag":29,"props":283,"children":285},{"alt":7,"src":284},"https://pic1.zhimg.com/80/v2-44ba517de154d9453d428af1ea34ae4c_720w.webp",[],{"type":17,"tag":25,"props":287,"children":288},{},[289],{"type":17,"tag":37,"props":290,"children":291},{},[292],{"type":23,"value":293},"图５ Server下发聚合模型",{"type":17,"tag":25,"props":295,"children":296},{},[297],{"type":17,"tag":37,"props":298,"children":299},{},[300],{"type":23,"value":301},"一般经过多轮迭代之后，聚合模型就能收敛了，于是所有Client在不泄露本地数据的情况下都获得了一个有效的模型。",{"type":17,"tag":25,"props":303,"children":304},{},[305],{"type":17,"tag":37,"props":306,"children":307},{},[308],{"type":17,"tag":37,"props":309,"children":310},{},[311],{"type":23,"value":312},"以上就是联邦学习的基本场景——横向联邦了，除了横向联邦，还有纵向联邦。如果你感兴趣的话，记得点赞转发加关注哦～",{"type":17,"tag":25,"props":314,"children":315},{},[316],{"type":17,"tag":37,"props":317,"children":318},{},[319],{"type":23,"value":320},"敬请期待下一篇～",{"type":17,"tag":322,"props":323,"children":325},"h2",{"id":324},"参考",[326],{"type":17,"tag":37,"props":327,"children":328},{},[329],{"type":23,"value":324},{"type":17,"tag":331,"props":332,"children":333},"ol",{},[334,349],{"type":17,"tag":335,"props":336,"children":337},"li",{},[338],{"type":17,"tag":37,"props":339,"children":340},{},[341,343],{"type":23,"value":342},"1.Federated Learning of Deep Networks using Model Averaging ",{"type":17,"tag":110,"props":344,"children":347},{"href":345,"rel":346},"https://arxiv.org/pdf/1602.05629v1.pdf",[114],[348],{"type":23,"value":345},{"type":17,"tag":335,"props":350,"children":351},{},[352],{"type":17,"tag":37,"props":353,"children":354},{},[355],{"type":23,"value":356},"2.模型权重指的是模型超参的值",{"type":17,"tag":25,"props":358,"children":359},{},[360],{"type":17,"tag":37,"props":361,"children":362},{},[363],{"type":17,"tag":29,"props":364,"children":366},{"alt":7,"src":365},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2022/10/24/f5482b1c286f4581b7f3deb4b69c8d1a.jpg",[],{"type":17,"tag":25,"props":368,"children":369},{},[370],{"type":17,"tag":37,"props":371,"children":372},{},[373],{"type":17,"tag":37,"props":374,"children":375},{},[376],{"type":23,"value":377},"MindSpore官方资料",{"type":17,"tag":25,"props":379,"children":380},{},[381],{"type":17,"tag":37,"props":382,"children":383},{},[384,389],{"type":17,"tag":37,"props":385,"children":386},{},[387],{"type":23,"value":388},"官方QQ群",{"type":23,"value":390}," : 486831414",{"type":17,"tag":25,"props":392,"children":393},{},[394],{"type":17,"tag":37,"props":395,"children":396},{},[397,402,403],{"type":17,"tag":37,"props":398,"children":399},{},[400],{"type":23,"value":401},"官网",{"type":23,"value":43},{"type":17,"tag":110,"props":404,"children":407},{"href":405,"rel":406},"https://www.mindspore.cn/",[114],[408],{"type":23,"value":405},{"type":17,"tag":25,"props":410,"children":411},{},[412],{"type":17,"tag":37,"props":413,"children":414},{},[415,420],{"type":17,"tag":37,"props":416,"children":417},{},[418],{"type":23,"value":419},"Gitee",{"type":23,"value":421}," : https : //gitee.com/mindspore/mindspore",{"type":17,"tag":25,"props":423,"children":424},{},[425],{"type":17,"tag":37,"props":426,"children":427},{},[428,433,435],{"type":17,"tag":37,"props":429,"children":430},{},[431],{"type":23,"value":432},"GitHub",{"type":23,"value":434}," : ",{"type":17,"tag":110,"props":436,"children":439},{"href":437,"rel":438},"https://github.com/mindspore-ai/mindspore",[114],[440],{"type":23,"value":437},{"type":17,"tag":25,"props":442,"children":443},{},[444],{"type":17,"tag":37,"props":445,"children":446},{},[447,452,453],{"type":17,"tag":37,"props":448,"children":449},{},[450],{"type":23,"value":451},"论坛",{"type":23,"value":43},{"type":17,"tag":110,"props":454,"children":457},{"href":455,"rel":456},"https://bbs.huaweicloud.com/forum/forum-1076-1.html",[114],[458],{"type":23,"value":455},{"title":7,"searchDepth":460,"depth":460,"links":461},4,[462],{"id":324,"depth":463,"text":324},2,"markdown","content:technology-blogs:zh:1903.md","content","technology-blogs/zh/1903.md","technology-blogs/zh/1903","md",1776506116848]