[{"data":1,"prerenderedAt":332},["ShallowReactive",2],{"content-query-UwJ0S2sxY2":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"category":13,"body":14,"_type":326,"_id":327,"_source":328,"_file":329,"_stem":330,"_extension":331},"/technology-blogs/zh/3401","zh",false,"","PreDiff：基于潜在扩散模型的降水短时预报","作者：于璠 来源：知乎","2024-09-25","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/11/28/10ca72875aad4615a0aa2d8240279f3a.png","technology-blogs","大V博文",{"type":15,"children":16,"toc":320},"root",[17,25,44,52,57,62,70,78,86,91,99,104,109,116,121,129,134,139,144,151,156,161,169,174,179,186,191,196,206,213,218,223,228,233,238,243,251,259,264,269,276,281,289,297,302,310,315],{"type":18,"tag":19,"props":20,"children":22},"element","h1",{"id":21},"prediff基于潜在扩散模型的降水短时预报",[23],{"type":24,"value":8},"text",{"type":18,"tag":26,"props":27,"children":28},"p",{},[29,31,37,39],{"type":24,"value":30},"**作者：**",{"type":18,"tag":32,"props":33,"children":34},"strong",{},[35],{"type":24,"value":36},"于璠",{"type":24,"value":38}," ",{"type":18,"tag":32,"props":40,"children":41},{},[42],{"type":24,"value":43},"来源：知乎",{"type":18,"tag":26,"props":45,"children":46},{},[47],{"type":18,"tag":32,"props":48,"children":49},{},[50],{"type":24,"value":51},"背景",{"type":18,"tag":26,"props":53,"children":54},{},[55],{"type":24,"value":56},"传统的天气预报技术依赖于复杂的物理模型，这些模型不仅计算成本高昂，还要求深厚的专业知识支撑。然而，近十年来，随着地球时空观测数据的爆炸式增长，深度学习技术为构建数据驱动的预测模型开辟了新的道路。虽然这些模型在多种地球系统预测任务中展现出巨大潜力，但它们在管理不确定性和整合特定领域先验知识方面仍有不足，时常导致预测结果模糊不清或在物理上不可信。",{"type":18,"tag":26,"props":58,"children":59},{},[60],{"type":24,"value":61},"为克服这些难题，来自香港科技大学的Gao Zhihan实现了prediff模型，创新性地提出了一种两阶段流程，专门用于实现概率性的时空预测。该流程融合了条件潜在扩散模型与显式的知识对齐机制，旨在生成既符合特定领域物理约束，又能精确捕捉时空变化的预测结果。通过这种方法，我们期望能够显著提升地球系统预测的准确性和可靠性。",{"type":18,"tag":26,"props":63,"children":64},{},[65],{"type":18,"tag":32,"props":66,"children":67},{},[68],{"type":24,"value":69},"01",{"type":18,"tag":26,"props":71,"children":72},{},[73],{"type":18,"tag":32,"props":74,"children":75},{},[76],{"type":24,"value":77},"模型介绍",{"type":18,"tag":26,"props":79,"children":80},{},[81],{"type":18,"tag":32,"props":82,"children":83},{},[84],{"type":24,"value":85},"1.1 扩散模型",{"type":18,"tag":26,"props":87,"children":88},{},[89],{"type":24,"value":90},"扩散模型通过反转预先定义的破坏原始数据的加噪过程来学习数据分布。加噪过程：",{"type":18,"tag":26,"props":92,"children":93},{},[94],{"type":18,"tag":95,"props":96,"children":98},"img",{"alt":7,"src":97},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/10/25/2b0da561c4ac43d8af1e4faef9390fb9.png",[],{"type":18,"tag":26,"props":100,"children":101},{},[102],{"type":24,"value":103},"公式1",{"type":18,"tag":26,"props":105,"children":106},{},[107],{"type":24,"value":108},"其中 x0∼p(x) 是真实数据，Xt~N(0,I)是随机噪声，系数αt是固定的schedule。为了将DM应用于时空预测，p(x | y)被因式分解和参数化：",{"type":18,"tag":26,"props":110,"children":111},{},[112],{"type":18,"tag":95,"props":113,"children":115},{"alt":7,"src":114},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/10/25/490ba633201c4ffeb8cf5b9e05897ccf.png",[],{"type":18,"tag":26,"props":117,"children":118},{},[119],{"type":24,"value":120},"公式2",{"type":18,"tag":26,"props":122,"children":123},{},[124],{"type":18,"tag":32,"props":125,"children":126},{},[127],{"type":24,"value":128},"1.2 潜在空间的条件扩散",{"type":18,"tag":26,"props":130,"children":131},{},[132],{"type":24,"value":133},"为了提高扩散模型训练和推理的计算效率，PreDiff采用两阶段训练过程，利用低维潜在空间表示的优势。第一阶段是训练逐帧变分自编码器（VAE），第二阶段训练一个条件扩散模型，在潜在空间中生成预测结果。",{"type":18,"tag":26,"props":135,"children":136},{},[137],{"type":24,"value":138},"Frame-wise autoencoder：在训练中用了pixel-wise的L2 Loss和一个对抗损失，在训练中排除了perceptual loss因为没有标准的预训练模型来感知地球观测数据。其中encoder E被训练用于生产潜在表示，decoder D学习从编码的潜在空间中重构数据。",{"type":18,"tag":26,"props":140,"children":141},{},[142],{"type":24,"value":143},"Latent diffusion：PreDiff的训练objective为：",{"type":18,"tag":26,"props":145,"children":146},{},[147],{"type":18,"tag":95,"props":148,"children":150},{"alt":7,"src":149},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/10/25/583e04576e19428aa8d2c26275bb1436.png",[],{"type":18,"tag":26,"props":152,"children":153},{},[154],{"type":24,"value":155},"公式3",{"type":18,"tag":26,"props":157,"children":158},{},[159],{"type":24,"value":160},"与图像相比，降水临近预报中的时空观测数据建模由于其维度更高，因此挑战更大。提出替换LDM中的UNet为Earthformer-UNet。Earthformer-UNet采用了UNet构架和cuboid attention，去除了Earthformer中连接encoder和decoder的cross-attention结构。",{"type":18,"tag":26,"props":162,"children":163},{},[164],{"type":18,"tag":32,"props":165,"children":166},{},[167],{"type":24,"value":168},"1.3 融合知识控制",{"type":18,"tag":26,"props":170,"children":171},{},[172],{"type":24,"value":173},"尽管扩散模型在生成多样化和现实风格上很有潜力，但生成的预测可能违反了物理约束，或者无视了特定领域的专业知识，从而无法给出合理的结果。一个可能的原因是扩散模型不一定在和领域知识适配的数据上进行训练。为了解决这个问题，提出了knowledge control来融入辅助的先验知识。knowledge control在预测结果上施加了一个限制。knowledge control的目的在于抑制产生违规预测的概率，但是由于由于数据收集和模拟中的噪声，即便是训练数据中的目标值也可能违反knowledge control。",{"type":18,"tag":26,"props":175,"children":176},{},[177],{"type":24,"value":178},"通过训练一个神经网络来实现knowledge control，从中间的潜在变量Zt开始。核心在于调整每一个潜在空间去噪步骤中的概率转移矩阵，来降低采样值zt违反限制的概率。",{"type":18,"tag":26,"props":180,"children":181},{},[182],{"type":18,"tag":95,"props":183,"children":185},{"alt":7,"src":184},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/10/25/34b89dc0f9fa411d947395cab9238f4b.png",[],{"type":18,"tag":26,"props":187,"children":188},{},[189],{"type":24,"value":190},"公式4",{"type":18,"tag":26,"props":192,"children":193},{},[194],{"type":24,"value":195},"其中λF是介导缩放因子。这个训练过程独立于LDM的训练，在推理的时候，knowledge control被用作plug-in模块，这个模块化的方法允许训练轻量型的knowledge control网络来施加不同的限制，而不需要重新训练整个网络。",{"type":18,"tag":197,"props":198,"children":200},"h2",{"id":199},"_14推理流程",[201],{"type":18,"tag":32,"props":202,"children":203},{},[204],{"type":24,"value":205},"1.4推理流程",{"type":18,"tag":26,"props":207,"children":208},{},[209],{"type":18,"tag":95,"props":210,"children":212},{"alt":7,"src":211},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/10/25/8df3245f9e6d4fe7a5eac8654784a1c6.png",[],{"type":18,"tag":26,"props":214,"children":215},{},[216],{"type":24,"value":217},"图1 网络架构",{"type":18,"tag":26,"props":219,"children":220},{},[221],{"type":24,"value":222},"首先，一个包含多个时间步的观测序列y通过逐帧编码器ℰ被编码成一个潜在上下文z_cond。这个潜在上下文捕捉了观测数据中的关键信息，为后续步骤提供了必要的指导。",{"type":18,"tag":26,"props":224,"children":225},{},[226],{"type":24,"value":227},"接下来，利用这个潜在上下文z_cond，我们引入了一个潜在扩散模型p_θ(z_t|z_t+1, z_cond)，其中z_t表示在时间步t的潜在变量。该模型基于Earthformer的核心神经网络，构建了UNet-style架构，以有效捕获数据中的长期复杂变化。",{"type":18,"tag":26,"props":229,"children":230},{},[231],{"type":24,"value":232},"在扩散模型的逆向过程中，从时间步T（通常是最嘈杂的潜在表示）开始，通过一系列的去噪步骤逐渐逼近初始观测的潜在表示。在每一步去噪过程中，我们引入了一个关键的知识对齐机制。这个机制通过训练一个知识对齐网络A，该网络能够参数化一个能量函数，该能量函数在每一步去噪时调整转换概率，以确保生成的潜在状态不仅符合数据分布，还符合领域特定的物理约束。",{"type":18,"tag":26,"props":234,"children":235},{},[236],{"type":24,"value":237},"具体地，在每一步去噪时，我们估计当前潜在状态与给定物理约束之间的偏差，并根据这个偏差调整潜在变量的转换分布。通过这种方式，我们能够抑制那些可能违反物理定律的潜在状态，同时鼓励生成物理上合理的中间潜在状态。",{"type":18,"tag":26,"props":239,"children":240},{},[241],{"type":24,"value":242},"最终，当去噪过程完成时，我们得到了一个接近初始观测但包含未来预测信息的潜在表示。通过解码器D，我们可以将这个潜在表示转换回预测的未来序列x，这个预测序列既包含了观测数据中的关键信息，又符合领域特定的物理约束。",{"type":18,"tag":26,"props":244,"children":245},{},[246],{"type":18,"tag":32,"props":247,"children":248},{},[249],{"type":24,"value":250},"02",{"type":18,"tag":26,"props":252,"children":253},{},[254],{"type":18,"tag":32,"props":255,"children":256},{},[257],{"type":24,"value":258},"结果",{"type":18,"tag":26,"props":260,"children":261},{},[262],{"type":24,"value":263},"我们评估了 PreDiff 在 SEVIR 上预测近期降水强度（“短临预报”）的任务。我们使用预期降水强度作为knowledge control来模拟可能的极端天气事件，如暴雨和干旱",{"type":18,"tag":26,"props":265,"children":266},{},[267],{"type":24,"value":268},"我们发现，具有预期未来降水强度的knowledge control可以有效地指导生成，同时保持保真度和对真实数据分布的依从性。例如，下图的第三行模拟了未来平均强度超过μτ + 4στ的极端情况下天气的发展情况（概率约为 0.35%）。这种模拟对于估计极端暴雨情况下的潜在损失很有价值。",{"type":18,"tag":26,"props":270,"children":271},{},[272],{"type":18,"tag":95,"props":273,"children":275},{"alt":7,"src":274},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/10/25/2e58df5a74504f0aac8a1d76419fa8b0.png",[],{"type":18,"tag":26,"props":277,"children":278},{},[279],{"type":24,"value":280},"图2 PreDiff在短临预报中的表现",{"type":18,"tag":26,"props":282,"children":283},{},[284],{"type":18,"tag":32,"props":285,"children":286},{},[287],{"type":24,"value":288},"03",{"type":18,"tag":26,"props":290,"children":291},{},[292],{"type":18,"tag":32,"props":293,"children":294},{},[295],{"type":24,"value":296},"感想与启发",{"type":18,"tag":26,"props":298,"children":299},{},[300],{"type":24,"value":301},"PreDiff通过引入条件潜变量扩散模型并结合显式的知识对齐机制，展示了在地球系统预测中融合数据驱动与物理模型的新思路。这一创新不仅强调了数据在模型训练中的重要性，还通过物理约束的引入，提高了预测的物理合理性和准确性。这启发我们，在应对复杂系统的预测任务时，不应仅依赖于单一的数据驱动或物理模型方法，也应该探索如何将两者有机结合，充分利用各自的优势。同时，PreDiff的实现也提醒我们，对于复杂物理过程的可微数值求解算法，其实现与嵌入到AI模型中参与训练的技术路线，是未来提升预测精度的重要方向之一。",{"type":18,"tag":197,"props":303,"children":305},{"id":304},"参考文献",[306],{"type":18,"tag":32,"props":307,"children":308},{},[309],{"type":24,"value":304},{"type":18,"tag":26,"props":311,"children":312},{},[313],{"type":24,"value":314},"[1] Gao Z, Shi X, Han B, et al. Prediff: Precipitation nowcasting with latent diffusion models[J]. Advances in Neural Information Processing Systems, 2024, 36.",{"type":18,"tag":26,"props":316,"children":317},{},[318],{"type":24,"value":319},"[2] gaozhihan/PreDiff： [NeurIPS 2023] 正式实现 “PreDiff： Precipitation Near casting with Latent Diffusion Models” (github.com)",{"title":7,"searchDepth":321,"depth":321,"links":322},4,[323,325],{"id":199,"depth":324,"text":205},2,{"id":304,"depth":324,"text":304},"markdown","content:technology-blogs:zh:3401.md","content","technology-blogs/zh/3401.md","technology-blogs/zh/3401","md",1776506129494]