[{"data":1,"prerenderedAt":383},["ShallowReactive",2],{"content-query-2aYyEtYd6g":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"category":13,"body":14,"_type":377,"_id":378,"_source":379,"_file":380,"_stem":381,"_extension":382},"/technology-blogs/zh/3593","zh",false,"","Bart模型论文解读，并基于MindSpore NLP推理复现","作者：鹤止         来源：知乎","2025-01-24","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/02/07/5309daee1ed14bf1aa71630545cddc67.png","technology-blogs","开发者分享",{"type":15,"children":16,"toc":365},"root",[17,25,42,47,55,59,64,69,89,93,98,106,111,116,129,137,141,149,157,161,166,174,187,200,204,212,220,224,229,234,239,246,254,258,265,273,278,282,289,297,302,310,318,323,331,336,341,348,353,360],{"type":18,"tag":19,"props":20,"children":22},"element","h1",{"id":21},"bart模型论文解读并基于mindspore-nlp推理复现",[23],{"type":24,"value":8},"text",{"type":18,"tag":26,"props":27,"children":28},"p",{},[29,35,37],{"type":18,"tag":30,"props":31,"children":32},"strong",{},[33],{"type":24,"value":34},"作者：鹤止",{"type":24,"value":36}," ",{"type":18,"tag":30,"props":38,"children":39},{},[40],{"type":24,"value":41},"来源：知乎",{"type":18,"tag":26,"props":43,"children":44},{},[45],{"type":24,"value":46},"随着自然语言处理（NLP）领域的快速发展，预训练模型成为推动各类任务性能提升的核心驱动力之一。从BERT到GPT，各类模型在理解与生成任务中推动了性能的提升。然而，它们的设计往往局限于特定任务场景，难以在更广泛的任务中兼顾效率与性能。Facebook AI团队提出的BART（Bidirectional and Auto-Regressive Transformers）以其统一的序列到序列框架和创新性的去噪预训练策略，为解决这一问题提供了新的思路。",{"type":18,"tag":26,"props":48,"children":49},{},[50],{"type":18,"tag":30,"props":51,"children":52},{},[53],{"type":24,"value":54},"论文创新点",{"type":18,"tag":56,"props":57,"children":58},"h2",{"id":7},[],{"type":18,"tag":26,"props":60,"children":61},{},[62],{"type":24,"value":63},"本文的创新点如下：",{"type":18,"tag":26,"props":65,"children":66},{},[67],{"type":24,"value":68},"**1、通用性强的去噪策略：**BART提出了各种灵活的文本扰动方法，使其能够更高效地学习语言的全局语义和结构特性。",{"type":18,"tag":70,"props":71,"children":72},"ul",{},[73,79,84],{"type":18,"tag":74,"props":75,"children":76},"li",{},[77],{"type":24,"value":78},"文本填充（Text Infilling）：随机选取文本片段并使用mask替换，同时学习缺失内容和缺失长度。",{"type":18,"tag":74,"props":80,"children":81},{},[82],{"type":24,"value":83},"句子随机排列（Sentence Shuffing）：打乱句子顺序，增强模型理解跨句依赖的能力。",{"type":18,"tag":74,"props":85,"children":86},{},[87],{"type":24,"value":88},"文本旋转（Document Rotation）：随机调整文档的起始位置，训练模型理解文档的整体结构。",{"type":18,"tag":56,"props":90,"children":92},{"id":91},"_1",[],{"type":18,"tag":26,"props":94,"children":95},{},[96],{"type":24,"value":97},"单词掩码与删除（Token Masking &Deletion）：丰富传统掩码策略，模型不仅需要预测丢失内容，还需要推断丢失位置。",{"type":18,"tag":26,"props":99,"children":100},{},[101],{"type":18,"tag":102,"props":103,"children":105},"img",{"alt":7,"src":104},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/02/07/bbcc387708834910ad0109fdb7195fed.png",[],{"type":18,"tag":26,"props":107,"children":108},{},[109],{"type":24,"value":110},"灵活地去噪策略",{"type":18,"tag":26,"props":112,"children":113},{},[114],{"type":24,"value":115},"**2、统一的序列到序列框架：**BART融合了BERT的双向编码器和GPT的自回归解码器。",{"type":18,"tag":70,"props":117,"children":118},{},[119,124],{"type":18,"tag":74,"props":120,"children":121},{},[122],{"type":24,"value":123},"编码器（encoder）：捕捉输入的全局上下文信息。",{"type":18,"tag":74,"props":125,"children":126},{},[127],{"type":24,"value":128},"解码器（decoder）：生成自回归输出，适用于生成任务。",{"type":18,"tag":26,"props":130,"children":131},{},[132],{"type":18,"tag":30,"props":133,"children":134},{},[135],{"type":24,"value":136},"数据集上的指标评价得分",{"type":18,"tag":56,"props":138,"children":140},{"id":139},"_2",[],{"type":18,"tag":26,"props":142,"children":143},{},[144],{"type":18,"tag":30,"props":145,"children":146},{},[147],{"type":24,"value":148},"01",{"type":18,"tag":26,"props":150,"children":151},{},[152],{"type":18,"tag":30,"props":153,"children":154},{},[155],{"type":24,"value":156},"生成任务",{"type":18,"tag":56,"props":158,"children":160},{"id":159},"_3",[],{"type":18,"tag":26,"props":162,"children":163},{},[164],{"type":24,"value":165},"BART在生成任务上展现了极强的能力，特别是文本摘要和对话生成方面。",{"type":18,"tag":26,"props":167,"children":168},{},[169],{"type":18,"tag":30,"props":170,"children":171},{},[172],{"type":24,"value":173},"文本摘要：",{"type":18,"tag":70,"props":175,"children":176},{},[177,182],{"type":18,"tag":74,"props":178,"children":179},{},[180],{"type":24,"value":181},"CNN/DailyMail数据集：ROUGE-1、ROUGE-2和ROUGE-L分别为44.16、21.28和40.90。",{"type":18,"tag":74,"props":183,"children":184},{},[185],{"type":24,"value":186},"XSum数据集（更抽象的摘要任务）：相比最优基线提升6分，ROUGE-1、ROUGE-2和ROUGE-L分别达到45.14、22.27和37.25。",{"type":18,"tag":26,"props":188,"children":189},{},[190,194,196],{"type":18,"tag":102,"props":191,"children":193},{"alt":7,"src":192},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/02/07/b93468a34d324f6eab383b80b2848255.png",[],{"type":24,"value":195},"\n**对话生成：**在ConvAI2任务中，BART的F1得分为20.72，Perplexity为11.85，显著优于之前的最佳系统。\n",{"type":18,"tag":102,"props":197,"children":199},{"alt":7,"src":198},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/02/07/f9af86e12fd748df95c0e67e8f539960.png",[],{"type":18,"tag":56,"props":201,"children":203},{"id":202},"_4",[],{"type":18,"tag":26,"props":205,"children":206},{},[207],{"type":18,"tag":30,"props":208,"children":209},{},[210],{"type":24,"value":211},"02",{"type":18,"tag":26,"props":213,"children":214},{},[215],{"type":18,"tag":30,"props":216,"children":217},{},[218],{"type":24,"value":219},"理解任务",{"type":18,"tag":56,"props":221,"children":223},{"id":222},"_5",[],{"type":18,"tag":26,"props":225,"children":226},{},[227],{"type":24,"value":228},"尽管BART的设计偏向于生成任务，但在理解任务上表现同样出色。",{"type":18,"tag":26,"props":230,"children":231},{},[232],{"type":24,"value":233},"GLUE基准测试：在MNLI任务中，BART达到89.9/90.1（m/mm）的准确率，与RoBERTa表现相当。在SST、QQP、QNLI任务当中表现出色。",{"type":18,"tag":26,"props":235,"children":236},{},[237],{"type":24,"value":238},"问答任务：在SQuAD 1.1上取得88.8/94.6（EM/F1）的成绩。",{"type":18,"tag":26,"props":240,"children":241},{},[242],{"type":18,"tag":102,"props":243,"children":245},{"alt":7,"src":244},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/02/07/48eabc28da8a463788ded1a110c3756e.png",[],{"type":18,"tag":26,"props":247,"children":248},{},[249],{"type":18,"tag":30,"props":250,"children":251},{},[252],{"type":24,"value":253},"相比其他工作的优势",{"type":18,"tag":56,"props":255,"children":257},{"id":256},"_6",[],{"type":18,"tag":26,"props":259,"children":260},{},[261],{"type":18,"tag":30,"props":262,"children":263},{},[264],{"type":24,"value":148},{"type":18,"tag":26,"props":266,"children":267},{},[268],{"type":18,"tag":30,"props":269,"children":270},{},[271],{"type":24,"value":272},"更强的生成能力",{"type":18,"tag":26,"props":274,"children":275},{},[276],{"type":24,"value":277},"相比BERT等只适用于理解任务的模型，BART的自回归解码器显著增强了生成任务性能，特别是在高度抽象的文本摘要和对话生成任务上。",{"type":18,"tag":56,"props":279,"children":281},{"id":280},"_7",[],{"type":18,"tag":26,"props":283,"children":284},{},[285],{"type":18,"tag":30,"props":286,"children":287},{},[288],{"type":24,"value":211},{"type":18,"tag":26,"props":290,"children":291},{},[292],{"type":18,"tag":30,"props":293,"children":294},{},[295],{"type":24,"value":296},"灵活的预训练目标",{"type":18,"tag":26,"props":298,"children":299},{},[300],{"type":24,"value":301},"BART的多种文本扰动方式（如文本填充和句子随机排列）使其对输入分布的适应能力更强。",{"type":18,"tag":26,"props":303,"children":304},{},[305],{"type":18,"tag":30,"props":306,"children":307},{},[308],{"type":24,"value":309},"03",{"type":18,"tag":26,"props":311,"children":312},{},[313],{"type":18,"tag":30,"props":314,"children":315},{},[316],{"type":24,"value":317},"减少预训练与下游任务的分布差异",{"type":18,"tag":26,"props":319,"children":320},{},[321],{"type":24,"value":322},"通过去噪自编码的预训练目标，BART 的解码器始终在真实上下文中生成输出，大大降低了预训练与下游任务间的分布不匹配问题。",{"type":18,"tag":26,"props":324,"children":325},{},[326],{"type":18,"tag":30,"props":327,"children":328},{},[329],{"type":24,"value":330},"使用MindNLP对数据集进行推理验证",{"type":18,"tag":26,"props":332,"children":333},{},[334],{"type":24,"value":335},"为了验证BART模型的效果，我们使用了MindNLP来进行推理验证。MindNLP是基于MindSpore的一个自然语言处理（NLP）开源库，它提供了大量的数据集、模型和工具，旨在降低进入NLP领域的门槛，加速研究和开发过程。",{"type":18,"tag":26,"props":337,"children":338},{},[339],{"type":24,"value":340},"我们分别使用了transformer和MindNLP两个框架来加载模型，并使用了SST和NPLI两个数据集来进行评估，结果如下表所示，可以看出transformers和MindNLP之间只有及其微小的差距。MindNLP作为一款新兴的开源工具库，MindNLP展现出了极大的潜力和广阔的发展前景。我相信随着其不断完善和优化，未来它将取得更加出色的表现，并且更简单上手。其完整的代码已上传至github,链接如下：luoluo0042/mindnlp_bart: Inference validation of bart models using mindnlp",{"type":18,"tag":26,"props":342,"children":343},{},[344],{"type":18,"tag":102,"props":345,"children":347},{"alt":7,"src":346},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/02/07/cd42ccd5121f41b29dc8d2a917ee3502.png",[],{"type":18,"tag":26,"props":349,"children":350},{},[351],{"type":24,"value":352},"在两个框架上进行推理验证",{"type":18,"tag":26,"props":354,"children":355},{},[356],{"type":18,"tag":30,"props":357,"children":358},{},[359],{"type":24,"value":330},{"type":18,"tag":26,"props":361,"children":362},{},[363],{"type":24,"value":364},"BART是一种强大的序列到序列预训练模型，通过结合BERT的双向编码器和GPT的自回归解码器，显著提升了文本生成任务的性能，同时在理解任务上表现稳定。MindNLP中包含了NLP中许多常用的方法，可以帮助我们更方便快捷地构建和训练模型，大家也可以尝试使用MindNLP对模型进行训练和推理验证。",{"title":7,"searchDepth":366,"depth":366,"links":367},4,[368,370,371,372,373,374,375,376],{"id":7,"depth":369,"text":7},2,{"id":91,"depth":369,"text":7},{"id":139,"depth":369,"text":7},{"id":159,"depth":369,"text":7},{"id":202,"depth":369,"text":7},{"id":222,"depth":369,"text":7},{"id":256,"depth":369,"text":7},{"id":280,"depth":369,"text":7},"markdown","content:technology-blogs:zh:3593.md","content","technology-blogs/zh/3593.md","technology-blogs/zh/3593","md",1776506131853]