[{"data":1,"prerenderedAt":481},["ShallowReactive",2],{"content-query-syTdGebIba":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"category":13,"body":14,"_type":475,"_id":476,"_source":477,"_file":478,"_stem":479,"_extension":480},"/technology-blogs/zh/3706","zh",false,"","MindSpore混合精度训练源码大揭秘","混合精度：AI的“选择性摸鱼”艺术","2025-04-22","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/04/25/51fc1fdf89fe47caacd4ec36df45a175.png","technology-blogs","实践",{"type":15,"children":16,"toc":459},"root",[17,25,42,48,53,68,76,84,89,99,104,109,114,119,127,135,140,148,153,158,163,168,173,181,189,194,202,207,212,217,222,226,231,239,247,252,260,265,270,275,284,293,298,306,311,316,321,336,341,349,354,359,364,379,384,389,394,399,404,409,424,429,434,439,444,449,454],{"type":18,"tag":19,"props":20,"children":22},"element","h1",{"id":21},"mindspore混合精度训练源码大揭秘",[23],{"type":24,"value":8},"text",{"type":18,"tag":26,"props":27,"children":29},"h3",{"id":28},"_01-混合精度ai的选择性摸鱼艺术",[30,36,38],{"type":18,"tag":31,"props":32,"children":33},"strong",{},[34],{"type":24,"value":35},"# 01",{"type":24,"value":37}," ",{"type":18,"tag":31,"props":39,"children":40},{},[41],{"type":24,"value":9},{"type":18,"tag":43,"props":44,"children":45},"p",{},[46],{"type":24,"value":47},"想象一下，你是一个每天要处理1000份报告的社畜。如果每份报告都用文言文写，你肯定累到原地爆炸。但如果用“缩写体”快速处理关键部分，只在最后总结时用正经格式——效率直接起飞！这就是混合精度（Mixed Precision）的核心思想：让计算在低精度（如float16）下飞驰，在关键环节（如梯度累积）切回高精度（float32）保命。",{"type":18,"tag":43,"props":49,"children":50},{},[51],{"type":24,"value":52},"MindSpore的混合精度模块就像一位精明的“摸鱼导师”，它通过白名单（该摸鱼的地方）和黑名单（必须正经的地方），教会神经网络何时该“偷懒”，何时该严谨。接下来，我们深入源码，看看这位“导师”是如何工作的。",{"type":18,"tag":26,"props":54,"children":56},{"id":55},"_02-源码解剖室四大核心武器",[57,62,63],{"type":18,"tag":31,"props":58,"children":59},{},[60],{"type":24,"value":61},"# 02",{"type":24,"value":37},{"type":18,"tag":31,"props":64,"children":65},{},[66],{"type":24,"value":67},"源码解剖室：四大核心武器",{"type":18,"tag":43,"props":69,"children":70},{},[71],{"type":18,"tag":31,"props":72,"children":73},{},[74],{"type":24,"value":75},"一",{"type":18,"tag":43,"props":77,"children":78},{},[79],{"type":18,"tag":31,"props":80,"children":81},{},[82],{"type":24,"value":83},"名单管理：谁是VIP，谁是钉子户",{"type":18,"tag":43,"props":85,"children":86},{},[87],{"type":24,"value":88},"代码中定义了两组“生死簿”：",{"type":18,"tag":90,"props":91,"children":93},"pre",{"code":92},"AMP_WHITE_LIST = [nn.Conv2d, P.MatMul, ...]  # 这些层允许用float16\nAMP_BLACK_LIST = [nn.BatchNorm2d, ...]       # 这些层必须用float32\n",[94],{"type":18,"tag":95,"props":96,"children":97},"code",{"__ignoreMap":7},[98],{"type":24,"value":92},{"type":18,"tag":43,"props":100,"children":101},{},[102],{"type":24,"value":103},"白名单（O1模式）：卷积、矩阵乘法等计算密集型操作，低精度提速明显。",{"type":18,"tag":43,"props":105,"children":106},{},[107],{"type":24,"value":108},"黑名单（O2模式）：BatchNorm、LayerNorm等对数值敏感的操作，必须高精度保平安。",{"type":18,"tag":43,"props":110,"children":111},{},[112],{"type":24,"value":113},"幽默解读：",{"type":18,"tag":43,"props":115,"children":116},{},[117],{"type":24,"value":118},"白名单成员就像公司里的“摸鱼之王”——老板睁只眼闭只眼；黑名单则是财务部的会计，错一个小数点就完犊子，必须时刻保持严谨。",{"type":18,"tag":43,"props":120,"children":121},{},[122],{"type":18,"tag":31,"props":123,"children":124},{},[125],{"type":24,"value":126},"二",{"type":18,"tag":43,"props":128,"children":129},{},[130],{"type":18,"tag":31,"props":131,"children":132},{},[133],{"type":24,"value":134},"精度转换的“乾坤大挪移”",{"type":18,"tag":43,"props":136,"children":137},{},[138],{"type":24,"value":139},"关键函数_insert_cast_for_operator负责插入类型转换：",{"type":18,"tag":90,"props":141,"children":143},{"code":142},"\ndef _insert_cast_for_operator(node, dtype):\n    # 输入转低精度\n    incast_node = Node.create_call_function(_amp_cast_op, args=[value, dtype])\n    # 输出转回高精度\n    outcast_node = Node.create_call_function(_amp_cast_op, args=[value, \"float32\"])\n",[144],{"type":18,"tag":95,"props":145,"children":146},{"__ignoreMap":7},[147],{"type":24,"value":142},{"type":18,"tag":43,"props":149,"children":150},{},[151],{"type":24,"value":152},"这个过程就像给数据装上“变形金刚”：",{"type":18,"tag":43,"props":154,"children":155},{},[156],{"type":24,"value":157},"1. 输入时：`float32` → `float16`（节省内存、加速计算）",{"type":18,"tag":43,"props":159,"children":160},{},[161],{"type":24,"value":162},"2. 计算后：`float16` → `float32`（避免误差累积）",{"type":18,"tag":43,"props":164,"children":165},{},[166],{"type":24,"value":167},"源码亮点：",{"type":18,"tag":43,"props":169,"children":170},{},[171],{"type":24,"value":172},"_remove_duplicated_cast函数会删除冗余的类型转换，如同老板发现你写了重复的周报，直接怒删！",{"type":18,"tag":43,"props":174,"children":175},{},[176],{"type":18,"tag":31,"props":177,"children":178},{},[179],{"type":24,"value":180},"三",{"type":18,"tag":43,"props":182,"children":183},{},[184],{"type":18,"tag":31,"props":185,"children":186},{},[187],{"type":24,"value":188},"策略引擎：O0到O3的“四档变速”",{"type":18,"tag":43,"props":190,"children":191},{},[192],{"type":24,"value":193},"在auto_mixed_precision函数中，不同模式对应不同策略：",{"type":18,"tag":90,"props":195,"children":197},{"code":196},"def auto_mixed_precision(network, amp_level=\"O0\"):\n    if amp_level == \"O1\":  # 白名单摸鱼\n        network = _auto_mixed_precision_rewrite(network, white_list=AMP_WHITE_LIST)\n    elif amp_level == \"O2\":  # 黑名单正经\n        network = _auto_black_list(network, AMP_BLACK_LIST)\n    elif amp_level == \"O3\":  # 全员摸鱼\n        network.to_float(float16)\n",[198],{"type":18,"tag":95,"props":199,"children":200},{"__ignoreMap":7},[201],{"type":24,"value":196},{"type":18,"tag":43,"props":203,"children":204},{},[205],{"type":24,"value":206},"O0：原生态模式，全员正经。",{"type":18,"tag":43,"props":208,"children":209},{},[210],{"type":24,"value":211},"O1：白名单成员摸鱼，其他正经。",{"type":18,"tag":43,"props":213,"children":214},{},[215],{"type":24,"value":216},"O2：黑名单成员正经，其他摸鱼。",{"type":18,"tag":43,"props":218,"children":219},{},[220],{"type":24,"value":221},"O3：彻底躺平，全员低精度（风险自负！）。",{"type":18,"tag":43,"props":223,"children":224},{},[225],{"type":24,"value":113},{"type":18,"tag":43,"props":227,"children":228},{},[229],{"type":24,"value":230},"O3模式好比让会计用计算器上的“近似计算”按钮做账——省时省力，但月底对不上账别哭！",{"type":18,"tag":43,"props":232,"children":233},{},[234],{"type":18,"tag":31,"props":235,"children":236},{},[237],{"type":24,"value":238},"四",{"type":18,"tag":43,"props":240,"children":241},{},[242],{"type":18,"tag":31,"props":243,"children":244},{},[245],{"type":24,"value":246},"动态损失缩放：AI的“自适应安全带”",{"type":18,"tag":43,"props":248,"children":249},{},[250],{"type":24,"value":251},"在build_train_network函数中，动态调整损失缩放系数：",{"type":18,"tag":90,"props":253,"children":255},{"code":254},"\nloss_scale_manager = DynamicLossScaleManager()\nnetwork = TrainOneStepWithLossScaleCell(network, optimizer, loss_scale_manager)\n",[256],{"type":18,"tag":95,"props":257,"children":258},{"__ignoreMap":7},[259],{"type":24,"value":254},{"type":18,"tag":43,"props":261,"children":262},{},[263],{"type":24,"value":264},"原理：低精度计算可能“数值爆炸”，动态缩放就像给训练过程装上安全带——数值稳定时加速，波动大时自动收紧。",{"type":18,"tag":43,"props":266,"children":267},{},[268],{"type":24,"value":269},"源码彩蛋：",{"type":18,"tag":43,"props":271,"children":272},{},[273],{"type":24,"value":274},"如果检测到CPU环境（`context.get_context(\"device_target\") == \"CPU\"`），会强制关闭某些优化，毕竟“小马拉大车”容易翻车。",{"type":18,"tag":26,"props":276,"children":278},{"id":277},"_03",[279],{"type":18,"tag":31,"props":280,"children":281},{},[282],{"type":24,"value":283},"# 03",{"type":18,"tag":26,"props":285,"children":287},{"id":286},"自定义秘籍让摸鱼更精准",[288],{"type":18,"tag":31,"props":289,"children":290},{},[291],{"type":24,"value":292},"自定义秘籍：让摸鱼更精准",{"type":18,"tag":43,"props":294,"children":295},{},[296],{"type":24,"value":297},"通过custom_mixed_precision函数，开发者可以自定义名单：",{"type":18,"tag":90,"props":299,"children":301},{"code":300},"# 让Flatten层也加入摸鱼大队\ncustom_white_list = amp.get_white_list()\ncustom_white_list.append(nn.Flatten)\nnet = amp.custom_mixed_precision(net, white_list=custom_white_list)\n",[302],{"type":18,"tag":95,"props":303,"children":304},{"__ignoreMap":7},[305],{"type":24,"value":300},{"type":18,"tag":43,"props":307,"children":308},{},[309],{"type":24,"value":310},"代码逻辑：",{"type":18,"tag":43,"props":312,"children":313},{},[314],{"type":24,"value":315},"- _list_check函数会严格审核自定义名单，防止“混入奇怪的东西”（比如把损失函数加入白名单）。",{"type":18,"tag":43,"props":317,"children":318},{},[319],{"type":24,"value":320},"- 内部警告机制（`logger.warning`）像一位唠叨的HR：“您移除了BatchNorm？确定不会出问题吗？”",{"type":18,"tag":26,"props":322,"children":324},{"id":323},"_04-幕后黑科技符号重写引擎",[325,330,331],{"type":18,"tag":31,"props":326,"children":327},{},[328],{"type":24,"value":329},"# 04",{"type":24,"value":37},{"type":18,"tag":31,"props":332,"children":333},{},[334],{"type":24,"value":335},"幕后黑科技：符号重写引擎",{"type":18,"tag":43,"props":337,"children":338},{},[339],{"type":24,"value":340},"在_auto_mixed_precision_rewrite函数中，MindSpore使用**符号重写（Symbolic Rewriting）**技术：",{"type":18,"tag":90,"props":342,"children":344},{"code":343},"stree = SymbolTree.create(network)  # 将网络转为符号树\n_insert_cast_for_operators(stree)   # 插入类型转换节点\nnew_net = stree.get_network()       # 重新生成网络\n",[345],{"type":18,"tag":95,"props":346,"children":347},{"__ignoreMap":7},[348],{"type":24,"value":343},{"type":18,"tag":43,"props":350,"children":351},{},[352],{"type":24,"value":353},"这相当于把神经网络拆解成乐高积木，在关键位置插入“转换积木”，再重新拼装。整个过程就像给网络做了一场精密的外科手术。",{"type":18,"tag":43,"props":355,"children":356},{},[357],{"type":24,"value":358},"幽默对比：",{"type":18,"tag":43,"props":360,"children":361},{},[362],{"type":24,"value":363},"传统框架的混合精度实现像“手动挡汽车”，需要开发者自己换挡；MindSpore的符号重写则是“自动驾驶”——你设定目的地，它自动选择最优路径。",{"type":18,"tag":26,"props":365,"children":367},{"id":366},"_05-性能实测速度与精度的博弈",[368,373,374],{"type":18,"tag":31,"props":369,"children":370},{},[371],{"type":24,"value":372},"# 05",{"type":24,"value":37},{"type":18,"tag":31,"props":375,"children":376},{},[377],{"type":24,"value":378},"性能实测：速度与精度的博弈",{"type":18,"tag":43,"props":380,"children":381},{},[382],{"type":24,"value":383},"根据官方测试，混合精度训练在不同场景下的表现：",{"type":18,"tag":43,"props":385,"children":386},{},[387],{"type":24,"value":388},"- CV模型（如ResNet50）：速度提升1.5~2倍，精度损失\u003C0.5%",{"type":18,"tag":43,"props":390,"children":391},{},[392],{"type":24,"value":393},"- NLP模型（如BERT）：速度提升1.2~1.8倍，需谨慎调整黑名单",{"type":18,"tag":43,"props":395,"children":396},{},[397],{"type":24,"value":398},"避坑指南：",{"type":18,"tag":43,"props":400,"children":401},{},[402],{"type":24,"value":403},"- 如果遇到NaN（数值爆炸），尝试调keep_batchnorm_fp32选项。",{"type":18,"tag":43,"props":405,"children":406},{},[407],{"type":24,"value":408},"- 模型部署时若出现精度不符，检查是否漏掉了某层的类型转换。",{"type":18,"tag":26,"props":410,"children":412},{"id":411},"_06-总结让ai学会聪明地偷懒",[413,418,419],{"type":18,"tag":31,"props":414,"children":415},{},[416],{"type":24,"value":417},"# 06",{"type":24,"value":37},{"type":18,"tag":31,"props":420,"children":421},{},[422],{"type":24,"value":423},"总结：让AI学会“聪明地偷懒”",{"type":18,"tag":43,"props":425,"children":426},{},[427],{"type":24,"value":428},"通过解剖混合精度模块，我们看到了MindSpore混合精度训练的四大核心设计：",{"type":18,"tag":43,"props":430,"children":431},{},[432],{"type":24,"value":433},"1. 名单管理：区分“可摸鱼”和“必须严谨”的组件。",{"type":18,"tag":43,"props":435,"children":436},{},[437],{"type":24,"value":438},"2. 动态插桩：通过符号重写自动插入类型转换。",{"type":18,"tag":43,"props":440,"children":441},{},[442],{"type":24,"value":443},"3. 策略分级：从保守到激进的四档优化。",{"type":18,"tag":43,"props":445,"children":446},{},[447],{"type":24,"value":448},"4. 损失缩放：自适应保护训练稳定性。",{"type":18,"tag":43,"props":450,"children":451},{},[452],{"type":24,"value":453},"正如程序员界的名言：“懒惰是美德”，但懒惰必须聪明。混合精度不是无脑砍精度，而是在速度与精度间找到精妙平衡——而这，正是MindSpore设计哲学的体现。",{"type":18,"tag":43,"props":455,"children":456},{},[457],{"type":24,"value":458},"最后友情提示：摸鱼虽好，可不要贪杯哦！（O3模式慎用）",{"title":7,"searchDepth":460,"depth":460,"links":461},4,[462,465,467,468,469,471,473],{"id":28,"depth":463,"text":464},3,"# 01 混合精度：AI的“选择性摸鱼”艺术",{"id":55,"depth":463,"text":466},"# 02 源码解剖室：四大核心武器",{"id":277,"depth":463,"text":283},{"id":286,"depth":463,"text":292},{"id":323,"depth":463,"text":470},"# 04 幕后黑科技：符号重写引擎",{"id":366,"depth":463,"text":472},"# 05 性能实测：速度与精度的博弈",{"id":411,"depth":463,"text":474},"# 06 总结：让AI学会“聪明地偷懒”","markdown","content:technology-blogs:zh:3706.md","content","technology-blogs/zh/3706.md","technology-blogs/zh/3706","md",1776506133402]