[{"data":1,"prerenderedAt":473},["ShallowReactive",2],{"content-query-eDbFkI0tZE":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"category":13,"body":14,"_type":467,"_id":468,"_source":469,"_file":470,"_stem":471,"_extension":472},"/technology-blogs/zh/1794","zh",false,"","【MindSpore易点通】网络实战之交叉熵类Loss函数","机器学习和深度学习中常用到的几种loss函数","2022-09-09","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2022/09/09/92adaab622ab4f3f871e66dbac63688e.png","technology-blogs","基础知识",{"type":15,"children":16,"toc":464},"root",[17,25,41,47,62,67,76,81,86,94,99,104,109,117,122,130,135,143,148,153,161,169,174,182,187,192,200,205,213,218,226,231,259,264,276,284,289,297,309,317,322,327,332,337,345,350,355,382,394,402,414,419,427,432,440,445,459],{"type":18,"tag":19,"props":20,"children":22},"element","h1",{"id":21},"mindspore易点通网络实战之交叉熵类loss函数",[23],{"type":24,"value":8},"text",{"type":18,"tag":26,"props":27,"children":29},"h6",{"id":28},"简介",[30],{"type":18,"tag":31,"props":32,"children":33},"strong",{},[34],{"type":18,"tag":31,"props":35,"children":36},{},[37],{"type":18,"tag":31,"props":38,"children":39},{},[40],{"type":24,"value":28},{"type":18,"tag":42,"props":43,"children":44},"p",{},[45],{"type":24,"value":46},"本篇内容和大家一起学习下机器学习和深度学习中常用到的几种loss函数，根据计算分类方式以及场景的不同，我分为了以下三部分进行分析。",{"type":18,"tag":26,"props":48,"children":50},{"id":49},"crossentropy-loss",[51],{"type":18,"tag":31,"props":52,"children":53},{},[54],{"type":18,"tag":31,"props":55,"children":56},{},[57],{"type":18,"tag":31,"props":58,"children":59},{},[60],{"type":24,"value":61},"CrossEntropy Loss",{"type":18,"tag":42,"props":63,"children":64},{},[65],{"type":24,"value":66},"交叉熵函数是在分类模型中常用的一种损失函数，其表达式为：",{"type":18,"tag":42,"props":68,"children":69},{},[70],{"type":18,"tag":71,"props":72,"children":75},"img",{"alt":73,"src":74},"cke_6740.png","https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/00f/38d/1f8/08e359bae900f38d1f8fc0153abba3cc.20220909063720.90654361656682485361324286704373:50530908090512:2400:AAE26D8D5E5CC48975C0B1F01C481594ACB1C9E92809DAA68149BC9B86F9DDE0.png",[],{"type":18,"tag":42,"props":77,"children":78},{},[79],{"type":24,"value":80},"其中用到了信息熵的概念，信息量是一个事件发生所带来的信息，而信息熵则是在结果出来之前对可能产生的信息量的期望，考虑该随机变量的所有可能取值，即所有可能发生事件所带来的信息量的期望。",{"type":18,"tag":42,"props":82,"children":83},{},[84],{"type":24,"value":85},"因此我们可以得到信息熵的计算表达式为：",{"type":18,"tag":42,"props":87,"children":88},{},[89],{"type":18,"tag":71,"props":90,"children":93},{"alt":91,"src":92},"cke_16606.png","https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/00f/38d/1f8/08e359bae900f38d1f8fc0153abba3cc.20220909063727.41836203839795006164822035920043:50530908090512:2400:5137479148D47C3DA96A7CF7327DF585E90BAEAD5C01BAFA0723097F8823BFBD.png",[],{"type":18,"tag":42,"props":95,"children":96},{},[97],{"type":24,"value":98},"其中P(xi)表示为在时间点x的发生概率，信息熵是用来衡量事物不确定性的。信息熵越大，事物越具不确定性，事物越复杂。",{"type":18,"tag":42,"props":100,"children":101},{},[102],{"type":24,"value":103},"可以理解为对于同一个随机变量x，有两个概率分布，判断这两个概率分布的差异。假设两个概率分布对应为p(x),q(x), 如何表示这两个分布的差异，我们可以使用信息熵判断，于是相对熵产生。",{"type":18,"tag":42,"props":105,"children":106},{},[107],{"type":24,"value":108},"p(x)分布的信息熵为：",{"type":18,"tag":42,"props":110,"children":111},{},[112],{"type":18,"tag":71,"props":113,"children":116},{"alt":114,"src":115},"cke_25440.png","https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/00f/38d/1f8/08e359bae900f38d1f8fc0153abba3cc.20220909063737.03011735520889635241141115098619:50530908090512:2400:A7A925B4CA711687590F2449A56A465DAA93810AA0046DA0C7A8D8B80DF4F8A2.png",[],{"type":18,"tag":42,"props":118,"children":119},{},[120],{"type":24,"value":121},"q(x)分布的信息熵为：",{"type":18,"tag":42,"props":123,"children":124},{},[125],{"type":18,"tag":71,"props":126,"children":129},{"alt":127,"src":128},"cke_33767.png","https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/00f/38d/1f8/08e359bae900f38d1f8fc0153abba3cc.20220909063743.52263122389202279793837175851080:50530908090512:2400:25526619A8C4D29DEE053AA19DB4E5182B571D0638AFF1CDB38256A44332215A.png",[],{"type":18,"tag":42,"props":131,"children":132},{},[133],{"type":24,"value":134},"相对熵为：",{"type":18,"tag":42,"props":136,"children":137},{},[138],{"type":18,"tag":71,"props":139,"children":142},{"alt":140,"src":141},"cke_43799.png","https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/00f/38d/1f8/08e359bae900f38d1f8fc0153abba3cc.20220909063750.58818591242307220818041332889008:50530908090512:2400:DA532E9CF49292F8D670D7EA19B316DABD5D10AC31E42F0A6349FFAB64D566DC.png",[],{"type":18,"tag":42,"props":144,"children":145},{},[146],{"type":24,"value":147},"p(x)为样本真实分布，q(x)为预测分布",{"type":18,"tag":42,"props":149,"children":150},{},[151],{"type":24,"value":152},"于是得到相对熵公式为：",{"type":18,"tag":42,"props":154,"children":155},{},[156],{"type":18,"tag":71,"props":157,"children":160},{"alt":158,"src":159},"cke_53897.png","https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/00f/38d/1f8/08e359bae900f38d1f8fc0153abba3cc.20220909063801.68587678140951987707746577997735:50530908090512:2400:3510F849E8E3F91C159F88552A74D3B5A37845ADECC7F7D4DA5039F0C9C1597D.png",[],{"type":18,"tag":42,"props":162,"children":163},{},[164],{"type":18,"tag":71,"props":165,"children":168},{"alt":166,"src":167},"cke_66340.png","https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/00f/38d/1f8/08e359bae900f38d1f8fc0153abba3cc.20220909063816.86205494234655488107266472448412:50530908090512:2400:F01E4D5F8A49074C13C49A4ABEE2D9C568580F761268B52ABCF78305F2726947.png",[],{"type":18,"tag":42,"props":170,"children":171},{},[172],{"type":24,"value":173},"交叉熵的函数表示为：",{"type":18,"tag":42,"props":175,"children":176},{},[177],{"type":18,"tag":71,"props":178,"children":181},{"alt":179,"src":180},"cke_77151.png","https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/00f/38d/1f8/08e359bae900f38d1f8fc0153abba3cc.20220909063825.99390022611841972521233218459693:50530908090512:2400:063CE070E7834690D7D73629B21FD6DB1F8C8C82D966291F98110705453CFAE5.png",[],{"type":18,"tag":42,"props":183,"children":184},{},[185],{"type":24,"value":186},"我们观察可以看出，这里与相对熵较为相似，由于我们进行模型训练，有监督训练，样本标签已经确定，相当于真实的概率的分布P(x)已经得知，因此这边的为固定值，相当于常量，那么可以继续优化表达式。",{"type":18,"tag":42,"props":188,"children":189},{},[190],{"type":24,"value":191},"在我们模型训练中完整的相对熵表达式为：",{"type":18,"tag":42,"props":193,"children":194},{},[195],{"type":18,"tag":71,"props":196,"children":199},{"alt":197,"src":198},"cke_87454.png","https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/00f/38d/1f8/08e359bae900f38d1f8fc0153abba3cc.20220909063836.54510165178314379618613434206308:50530908090512:2400:4FC96ECB7303615BF4F8239F894514973359F5EA02082749C2050B1CDEFA9D4B.png",[],{"type":18,"tag":42,"props":201,"children":202},{},[203],{"type":24,"value":204},"对于其做为损失函数，常量可以忽略，因此得到了交叉熵的表现形式。",{"type":18,"tag":42,"props":206,"children":207},{},[208],{"type":18,"tag":71,"props":209,"children":212},{"alt":210,"src":211},"cke_98375.png","https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/00f/38d/1f8/08e359bae900f38d1f8fc0153abba3cc.20220909063843.66231386239902514579726963497365:50530908090512:2400:07CF1207CA73CA744177BED433DE7D387AC685A1DB76353705B581B4212D7010.png",[],{"type":18,"tag":42,"props":214,"children":215},{},[216],{"type":24,"value":217},"对于在二分类损失函数中应用，交叉熵损失函数为以下形式。",{"type":18,"tag":42,"props":219,"children":220},{},[221],{"type":18,"tag":71,"props":222,"children":225},{"alt":223,"src":224},"cke_108784.png","https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/00f/38d/1f8/08e359bae900f38d1f8fc0153abba3cc.20220909063848.04898342691594365064649981822981:50530908090512:2400:D150DE3BE24513D653D6ED5BDCB9744E475F2506B018F91E121A7CFE5BAEE437.png",[],{"type":18,"tag":42,"props":227,"children":228},{},[229],{"type":24,"value":230},"了解完交叉熵的基本计算原理，下面关联下另一种以交叉熵为基础的loss函数：BCELoss、BCEWithLogitsLoss和softmax_cross_entropy_with_logits。",{"type":18,"tag":26,"props":232,"children":234},{"id":233},"bceloss和softmarginloss",[235,248],{"type":18,"tag":31,"props":236,"children":237},{},[238,246],{"type":18,"tag":31,"props":239,"children":240},{},[241],{"type":18,"tag":31,"props":242,"children":243},{},[244],{"type":24,"value":245},"BCELoss",{"type":24,"value":247},"****和",{"type":18,"tag":31,"props":249,"children":250},{},[251],{"type":18,"tag":31,"props":252,"children":253},{},[254],{"type":18,"tag":31,"props":255,"children":256},{},[257],{"type":24,"value":258},"SoftMarginLoss",{"type":18,"tag":42,"props":260,"children":261},{},[262],{"type":24,"value":263},"这两种函数都是基于交叉熵的二分类loss函数，所以放在一起分析。",{"type":18,"tag":42,"props":265,"children":266},{},[267,274],{"type":18,"tag":31,"props":268,"children":269},{},[270],{"type":18,"tag":31,"props":271,"children":272},{},[273],{"type":24,"value":245},{"type":24,"value":275},"中文名称是二分类交叉熵损失，它是用于做二分类模型的损失函数，因为是二分类，可以用0、1表示两个类别。如果想用于多分类的模型，可以将类别拆分成两两一组进行使用。先来看下BCELoss的表达式。",{"type":18,"tag":42,"props":277,"children":278},{},[279],{"type":18,"tag":71,"props":280,"children":283},{"alt":281,"src":282},"cke_119250.png","https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/00f/38d/1f8/08e359bae900f38d1f8fc0153abba3cc.20220909063903.03154315645493979368727701371550:50530908090512:2400:9C425E1C47F8C18611891DC3E6467887ECC3D8876E06DBEBC3AF268E826A84FC.png",[],{"type":18,"tag":42,"props":285,"children":286},{},[287],{"type":24,"value":288},"式子中的pt表示模型的预测值；target表示真实值,；w是权重值,一般是1。因为用0、1表示两个类别，所以在预测值和真实值相同时，其中一项将会为0，上面这个表达式是计算的单个样本。当一个batch的N个样本时，还需要累加再取平均数。",{"type":18,"tag":42,"props":290,"children":291},{},[292],{"type":18,"tag":71,"props":293,"children":296},{"alt":294,"src":295},"cke_131517.png","https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/00f/38d/1f8/08e359bae900f38d1f8fc0153abba3cc.20220909063917.91510536377618405253701342586904:50530908090512:2400:9B33DF2DFBF7786BA5F5B9C9AA7F148B079F353A03139FE1F6BBC259D6FFD494.png",[],{"type":18,"tag":42,"props":298,"children":299},{},[300,307],{"type":18,"tag":31,"props":301,"children":302},{},[303],{"type":18,"tag":31,"props":304,"children":305},{},[306],{"type":24,"value":258},{"type":24,"value":308},"对于包含N个样本的batch数据D(x,y), x代表模型输出， y代表真实的类 ，表达式如下：",{"type":18,"tag":42,"props":310,"children":311},{},[312],{"type":18,"tag":71,"props":313,"children":316},{"alt":314,"src":315},"cke_142069.png","https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/00f/38d/1f8/08e359bae900f38d1f8fc0153abba3cc.20220909063925.95263579714538387723458742387870:50530908090512:2400:721E07098662743E544B2E7BFE0997FA3B08633C8B36CE494C4F4A61DE8E8EE8.png",[],{"type":18,"tag":42,"props":318,"children":319},{},[320],{"type":24,"value":321},"式子中的x.nelement( )代表x中元素的个数N",{"type":18,"tag":42,"props":323,"children":324},{},[325],{"type":24,"value":326},"如果单个样本对应一个二分类，则x.nelement( )=N",{"type":18,"tag":42,"props":328,"children":329},{},[330],{"type":24,"value":331},"如果单个样本对应M个二分类，则x.nelement( )=M∗N",{"type":18,"tag":42,"props":333,"children":334},{},[335],{"type":24,"value":336},"我们通过累加前的单个加数来分析",{"type":18,"tag":42,"props":338,"children":339},{},[340],{"type":18,"tag":71,"props":341,"children":344},{"alt":342,"src":343},"cke_152671.png","https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/00f/38d/1f8/08e359bae900f38d1f8fc0153abba3cc.20220909063935.93884448933032871563004052418169:50530908090512:2400:071F458A55597D61C94A301A9988CB328ED9534848C3B7AAEC48E7570EB4C142.png",[],{"type":18,"tag":42,"props":346,"children":347},{},[348],{"type":24,"value":349},"·当x[i]与y[i]同号，即预测正确时，x[i]与y[i]乘积越大，那么loss会越小，分类确信度就会越高；",{"type":18,"tag":42,"props":351,"children":352},{},[353],{"type":24,"value":354},"·同理，当x[i]与y[i]异号，即预测错误时，loss越大。",{"type":18,"tag":26,"props":356,"children":358},{"id":357},"bcewithlogitsloss和softmax_cross_entropy_with_logits",[359,371],{"type":18,"tag":31,"props":360,"children":361},{},[362,370],{"type":18,"tag":31,"props":363,"children":364},{},[365],{"type":18,"tag":31,"props":366,"children":367},{},[368],{"type":24,"value":369},"BCEWithLogitsLoss",{"type":24,"value":247},{"type":18,"tag":31,"props":372,"children":373},{},[374],{"type":18,"tag":31,"props":375,"children":376},{},[377],{"type":18,"tag":31,"props":378,"children":379},{},[380],{"type":24,"value":381},"softmax_cross_entropy_with_logits",{"type":18,"tag":42,"props":383,"children":384},{},[385,392],{"type":18,"tag":31,"props":386,"children":387},{},[388],{"type":18,"tag":31,"props":389,"children":390},{},[391],{"type":24,"value":369},{"type":24,"value":393},"将sigmoid操作和与BCELoss组合到了一起使用。计算过程和原理是与BCELoss类似的，在BCELoss的计算表达计算式的基础中增加一个sigmoid计算，表达式如下。",{"type":18,"tag":42,"props":395,"children":396},{},[397],{"type":18,"tag":71,"props":398,"children":401},{"alt":399,"src":400},"cke_161582.png","https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/00f/38d/1f8/08e359bae900f38d1f8fc0153abba3cc.20220909063948.61582635426211218056244988609698:50530908090512:2400:7A182D3BC056F8E36CBA651E5AD0B9113E2927522077099581DD21C7981E9B97.png",[],{"type":18,"tag":42,"props":403,"children":404},{},[405,412],{"type":18,"tag":31,"props":406,"children":407},{},[408],{"type":18,"tag":31,"props":409,"children":410},{},[411],{"type":24,"value":381},{"type":24,"value":413},"是在交叉熵前做一次softmax计算。具体的执行流程大概分为两个部分：",{"type":18,"tag":42,"props":415,"children":416},{},[417],{"type":24,"value":418},"第一部分是对网络模型最后一层的输出做一个softmax，softmax的用处通常是求取输出属于某一类的概率，对于单样本而言，输出就是一个num_classes 大小的向量（[Y1，Y2，Y3，...]其中Y1，Y2，Y3，...分别代表了是属于该类的概率）。softmax的计算表达式如下：",{"type":18,"tag":42,"props":420,"children":421},{},[422],{"type":18,"tag":71,"props":423,"children":426},{"alt":424,"src":425},"cke_170533.png","https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/00f/38d/1f8/08e359bae900f38d1f8fc0153abba3cc.20220909063959.01816765809451607434148239104935:50530908090512:2400:824A186820183CFF60B3FF0170254D82EF2D25B73FCF9899723B5894F5FAC2AC.png",[],{"type":18,"tag":42,"props":428,"children":429},{},[430],{"type":24,"value":431},"第二部分是将softmax的输出向量[Y1，Y2，Y3，...]和样本的实际标签做一个交叉熵计算",{"type":18,"tag":42,"props":433,"children":434},{},[435],{"type":18,"tag":71,"props":436,"children":439},{"alt":437,"src":438},"cke_179526.png","https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/00f/38d/1f8/08e359bae900f38d1f8fc0153abba3cc.20220909064007.68716636847725442766030558643394:50530908090512:2400:B5841C8790B68ED3C077D8A3F83A34AF8517CDA6EC9C3D1DFA6CC3FF54D0BA15.png",[],{"type":18,"tag":42,"props":441,"children":442},{},[443],{"type":24,"value":444},"y`i指实际标签中第i个的值；yi指softmax的输出向量[Y1，Y2,Y3...]中，第i个元素的值。从而可以计算出loss值。",{"type":18,"tag":26,"props":446,"children":448},{"id":447},"总结",[449],{"type":18,"tag":31,"props":450,"children":451},{},[452],{"type":18,"tag":31,"props":453,"children":454},{},[455],{"type":18,"tag":31,"props":456,"children":457},{},[458],{"type":24,"value":447},{"type":18,"tag":42,"props":460,"children":461},{},[462],{"type":24,"value":463},"本篇首先对交叉熵原理做了解析，再基于交叉熵的基础，引出了第二部分BCELoss和SoftMarginLoss的二分类loss函数，以及第三部分可用于多分类场景的BCEWithLogitsLoss和softmax_cross_entropy_with_logits损失函数。",{"title":7,"searchDepth":465,"depth":465,"links":466},4,[],"markdown","content:technology-blogs:zh:1794.md","content","technology-blogs/zh/1794.md","technology-blogs/zh/1794","md",1776506115825]