[{"data":1,"prerenderedAt":385},["ShallowReactive",2],{"content-query-T0EaMMNskO":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"category":13,"body":14,"_type":379,"_id":380,"_source":381,"_file":382,"_stem":383,"_extension":384},"/technology-blogs/zh/3749","zh",false,"","扩散模型系列——DDIM","DDIM   论文地址：Denoising Diffusion Implicit Models    代码地址：https://github.com/ermongroup/ddim","2025-05-07","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/06/06/4b233716ac074fc08408dbde4fa39997.png","technology-blogs","开发者说",{"type":15,"children":16,"toc":357},"root",[17,25,32,47,58,65,70,85,90,109,115,122,127,156,162,167,210,216,221,226,231,237,243,248,253,258,264,269,274,279,285,290,296,314,320,342,348],{"type":18,"tag":19,"props":20,"children":22},"element","h1",{"id":21},"扩散模型系列ddim",[23],{"type":24,"value":8},"text",{"type":18,"tag":26,"props":27,"children":29},"h2",{"id":28},"ddim",[30],{"type":24,"value":31},"DDIM",{"type":18,"tag":33,"props":34,"children":35},"p",{},[36,38],{"type":24,"value":37},"论文地址：",{"type":18,"tag":39,"props":40,"children":44},"a",{"href":41,"rel":42},"https://arxiv.org/pdf/2010.02502",[43],"nofollow",[45],{"type":24,"value":46},"Denoising Diffusion Implicit Models",{"type":18,"tag":33,"props":48,"children":49},{},[50,52],{"type":24,"value":51},"代码地址：",{"type":18,"tag":39,"props":53,"children":56},{"href":54,"rel":55},"https://github.com/ermongroup/ddim",[43],[57],{"type":24,"value":54},{"type":18,"tag":59,"props":60,"children":62},"h3",{"id":61},"一概述",[63],{"type":24,"value":64},"一、概述",{"type":18,"tag":33,"props":66,"children":67},{},[68],{"type":24,"value":69},"DDIM 是基于 DDPM 改进的迭代隐式概率扩撒模型，核心目标是在保持生成质量的同时加速采样过程。通过引入非马尔可夫扩散过程和确定性采样机制，DDIM 允许在去噪时跳过部分时间步，可以显著减少计算量。其核心创新在于：",{"type":18,"tag":71,"props":72,"children":73},"ol",{},[74,80],{"type":18,"tag":75,"props":76,"children":77},"li",{},[78],{"type":24,"value":79},"可调方差参数：通过控制反向过程的随机性，实现从完全随机（DDPM）到完全确定（无噪声）的采样模式；",{"type":18,"tag":75,"props":81,"children":82},{},[83],{"type":24,"value":84},"跳跃式采样：无需遍历所有时间步，可直接在预设的关键时间点之间跳转，大幅提升生成速度。",{"type":18,"tag":33,"props":86,"children":87},{},[88],{"type":24,"value":89},"DDIM 的主要特点包括：",{"type":18,"tag":91,"props":92,"children":93},"ul",{},[94,99,104],{"type":18,"tag":75,"props":95,"children":96},{},[97],{"type":24,"value":98},"非马尔可夫过程：打破 DDPM 的严格马尔可夫链限制，允许当前状态依赖任意历史状态；",{"type":18,"tag":75,"props":100,"children":101},{},[102],{"type":24,"value":103},"确定性采样：通过设置方差为 0，消除采样过程的随机性，提升生成稳定性；",{"type":18,"tag":75,"props":105,"children":106},{},[107],{"type":24,"value":108},"采样效率：支持“跳步”采样，在 10-50 步内即可生成高质量样本（DDPM 需 1000 步）。",{"type":18,"tag":59,"props":110,"children":112},{"id":111},"二主要步骤",[113],{"type":24,"value":114},"二、主要步骤",{"type":18,"tag":116,"props":117,"children":119},"h4",{"id":118},"_1-正向扩散过程",[120],{"type":24,"value":121},"1. 正向扩散过程",{"type":18,"tag":33,"props":123,"children":124},{},[125],{"type":24,"value":126},"DDIM 的正向扩散过程与 DDPM 一致，都是为了在每个时间步 t 中，逐渐增加噪声的比例，将原始数据 x_0转变为带有噪声的数据 x_t = \\alpha_t x_0 + (1 - \\alpha_t) \\epsilon，其中 \\alpha_t是扩散系数，\\epsilon是标准正态分布的噪声。",{"type":18,"tag":91,"props":128,"children":129},{},[130,141],{"type":18,"tag":75,"props":131,"children":132},{},[133,135,139],{"type":24,"value":134},"单步扩散：",{"type":18,"tag":136,"props":137,"children":138},"br",{},[],{"type":24,"value":140},"q(x_t \\mid x_{t-1}) = \\mathcal{N}\\left(x_t; \\sqrt{1-\\beta_t}x_{t-1}, \\beta_t \\mathbf{I}\\right)",{"type":18,"tag":75,"props":142,"children":143},{},[144,146,149,151,154],{"type":24,"value":145},"边际分布：",{"type":18,"tag":136,"props":147,"children":148},{},[],{"type":24,"value":150},"x_t = \\sqrt{\\bar{\\alpha}_t} x_0 + \\sqrt{1 - \\bar{\\alpha}_t} \\epsilon, \\quad \\bar{\\alpha}_t = \\prod_{s=1}^t \\alpha_s, \\, \\alpha_s = 1 - \\beta_s",{"type":18,"tag":136,"props":152,"children":153},{},[],{"type":24,"value":155},"其中 \\beta_t是随时间递增的方差调度（如线性调度 \\beta_t = \\beta_{\\text{start}} + t*(\\beta_{\\text{end}} - \\beta_{\\text{start}})/T）。\\mathcal{N}(\\cdot; \\mu, \\sigma^2 I)表示均值为 \\mu，协方差矩阵为 \\sigma^2 I的多元高斯分布。",{"type":18,"tag":116,"props":157,"children":159},{"id":158},"_2-反向去噪过程",[160],{"type":24,"value":161},"2. 反向去噪过程",{"type":18,"tag":33,"props":163,"children":164},{},[165],{"type":24,"value":166},"DDIM 的反向去噪过程与 DDPM 不同，它在不利用马尔可夫假设的情况下推导出了 diffusion 的反向过程，最终可以实现仅采样 20 ～ 100 步的情况下达到和 DDPM 采样 1000 步相近的生成效果。",{"type":18,"tag":91,"props":168,"children":169},{},[170,195],{"type":18,"tag":75,"props":171,"children":172},{},[173,175,178,180,183,185,188,190,193],{"type":24,"value":174},"条件分布重构： DDIM 重新定义反向过程为带可调方差的高斯分布：",{"type":18,"tag":136,"props":176,"children":177},{},[],{"type":24,"value":179},"p_\\theta(x_{t-1} \\mid x_t) = \\mathcal{N}\\left(x_{t-1}; \\tilde{\\mu}_t(x_t, \\epsilon_\\theta), \\sigma_t^2 \\mathbf{I}\\right)",{"type":18,"tag":136,"props":181,"children":182},{},[],{"type":24,"value":184},"其中均值 \\tilde{\\mu}_t由神经网络预测的噪声 \\epsilon_\\theta(x_t, t)推导：",{"type":18,"tag":136,"props":186,"children":187},{},[],{"type":24,"value":189},"\\tilde{\\mu}_t = \\sqrt{\\alpha_{t-1}} \\cdot \\hat{x}_0 + \\sqrt{1 - \\alpha_{t-1} - \\sigma_t^2} \\cdot \\frac{x_t - \\sqrt{\\alpha_t} \\hat{x}_0}{\\sqrt{1 - \\alpha_t}}",{"type":18,"tag":136,"props":191,"children":192},{},[],{"type":24,"value":194},"这里 \\hat{x}_0 = \\frac{x_t - \\sqrt{1 - \\alpha_t} \\epsilon_\\theta}{\\sqrt{\\alpha_t}}是对原始数据的估计。",{"type":18,"tag":75,"props":196,"children":197},{},[198,200,203,205,208],{"type":24,"value":199},"确定性采样（σₜ=0）： 当方差 \\sigma_t = 0时，采样过程完全确定，无需添加随机噪声：",{"type":18,"tag":136,"props":201,"children":202},{},[],{"type":24,"value":204},"x_{t-1} = \\sqrt{\\alpha_{t-1}} \\cdot \\hat{x}_0 + \\sqrt{1 - \\alpha_{t-1}} \\cdot \\epsilon_\\theta(x_t, t)",{"type":18,"tag":136,"props":206,"children":207},{},[],{"type":24,"value":209},"与 DDPM 相比，DDIM 通过设定方差参数 \\sigma_t=0，生成过程完全确定，消除随机性干扰。",{"type":18,"tag":116,"props":211,"children":213},{"id":212},"_3-跳跃式采样",[214],{"type":24,"value":215},"3. 跳跃式采样",{"type":18,"tag":33,"props":217,"children":218},{},[219],{"type":24,"value":220},"跳跃式采样是 DDIM 所采取的最关键的核心优化，它允许 DDIM 在采样时跳过一些中间时间步，加快采样速度。形式化地来说，DDPM 的采样时间步应当是 [T, T-1, ..., 2, 1]，而 DDIM 可以直接从其中抽取一个子序列 [T_s, T_{s-1}, ..., T_2, T_1]进行采样，此时只需递归应用公式：",{"type":18,"tag":33,"props":222,"children":223},{},[224],{"type":24,"value":225},"x_s = \\sqrt{\\bar{\\alpha}_s} \\cdot \\hat{x}_0 + \\sqrt{1 - \\bar{\\alpha}_s} \\cdot \\epsilon_\\theta(x_t, t)",{"type":18,"tag":33,"props":227,"children":228},{},[229],{"type":24,"value":230},"通过预设时间步子集（如每隔 10 步采样一次），可在大幅减少计算量的同时保持生成质量。",{"type":18,"tag":59,"props":232,"children":234},{"id":233},"三数学理论",[235],{"type":24,"value":236},"三、数学理论",{"type":18,"tag":116,"props":238,"children":240},{"id":239},"_1-反向过程的条件分布",[241],{"type":24,"value":242},"1. 反向过程的条件分布",{"type":18,"tag":33,"props":244,"children":245},{},[246],{"type":24,"value":247},"DDIM 通过引入可调方差参数 \\sigma_t，将反向过程的条件分布扩展为：",{"type":18,"tag":33,"props":249,"children":250},{},[251],{"type":24,"value":252},"q_\\sigma(x_{t-1} \\mid x_t, x_0) = \\mathcal{N}\\left(x_{t-1}; \\frac{\\sqrt{\\alpha_{t-1}} (x_t - \\sqrt{1 - \\alpha_t} \\epsilon)}{\\sqrt{\\alpha_t}} , \\sigma_t^2 \\mathbf{I}\\right)",{"type":18,"tag":33,"props":254,"children":255},{},[256],{"type":24,"value":257},"当 \\sigma_t = \\sqrt{\\beta_t}时，退化为 DDPM 的马尔可夫采样；当 \\sigma_t = 0时，采样过程完全确定。",{"type":18,"tag":116,"props":259,"children":261},{"id":260},"_2-变分下界简化",[262],{"type":24,"value":263},"2. 变分下界简化",{"type":18,"tag":33,"props":265,"children":266},{},[267],{"type":24,"value":268},"DDIM 的训练目标与 DDPM 一致，都是最小化噪声预测损失，但 DIMM 通过非马尔可夫设计简化了变分下界：",{"type":18,"tag":33,"props":270,"children":271},{},[272],{"type":24,"value":273},"L_{\\text{DDIM}} = \\mathbb{E}_{q} \\left[ \\sum_{t=2}^{T} \\omega_t \\cdot D_{\\text{KL}}\\left(q(x_{t-1} \\mid x_t, x_0) \\parallel p_\\theta(x_{t-1} \\mid x_t)\\right) \\right]",{"type":18,"tag":33,"props":275,"children":276},{},[277],{"type":24,"value":278},"其中权重 \\omega*t由 \\sigma_t决定，且省略了 DDPM 中与边界条件相关的项（如 D_{\\text{KL}}(q(x_T \\mid x_0) \\parallel p(x_T))），大幅降低计算复杂度。D_{\\text{KL}}则是两个高斯分布的 KL 散度。",{"type":18,"tag":59,"props":280,"children":282},{"id":281},"四模型结构",[283],{"type":24,"value":284},"四、模型结构",{"type":18,"tag":33,"props":286,"children":287},{},[288],{"type":24,"value":289},"DDIM 沿用 DDPM 的 U-Net 架构作为主干网络，包含对称的编码器-解码器路径和跳跃连接，但针对采样效率进行了轻量化调整：",{"type":18,"tag":116,"props":291,"children":293},{"id":292},"_1-网络设计细节",[294],{"type":24,"value":295},"1. 网络设计细节",{"type":18,"tag":91,"props":297,"children":298},{},[299,304,309],{"type":18,"tag":75,"props":300,"children":301},{},[302],{"type":24,"value":303},"归一化与激活：使用GroupNorm替代 BatchNorm 用以提升小批量训练稳定性，使用SiLU 激活函数替代 ReLU，增强非线性建模能力；",{"type":18,"tag":75,"props":305,"children":306},{},[307],{"type":24,"value":308},"时间嵌入：将时间步 t编码为高维向量（如正弦编码或可学习嵌入），通过线性层与各层特征融合；",{"type":18,"tag":75,"props":310,"children":311},{},[312],{"type":24,"value":313},"跳跃连接：保留原来的编码器-解码器的多尺度特征融合，确保细节恢复能力。",{"type":18,"tag":116,"props":315,"children":317},{"id":316},"_2-关键模块对比",[318],{"type":24,"value":319},"2. 关键模块对比",{"type":18,"tag":91,"props":321,"children":322},{},[323,337],{"type":18,"tag":75,"props":324,"children":325},{},[326,328,335],{"type":24,"value":327},"采样层：DDIM 的",{"type":18,"tag":329,"props":330,"children":332},"code",{"className":331},[],[333],{"type":24,"value":334},"p_sample",{"type":24,"value":336},"方法通过判断σ_t是否为 0，决定是否添加随机噪声，默认σ_t=0 时为纯确定性计算；",{"type":18,"tag":75,"props":338,"children":339},{},[340],{"type":24,"value":341},"时间步处理：支持任意时间步跳转，无需按顺序遍历，通过预设的时间步列表（如[T_s, T_{s-1}, ..., T_2, T_1]）实现跳步采样。",{"type":18,"tag":59,"props":343,"children":345},{"id":344},"五代码实现",[346],{"type":24,"value":347},"五、代码实现",{"type":18,"tag":349,"props":350,"children":352},"pre",{"code":351},"# 核心采样逻辑\nclass DDIM(nn.Cell):\n    \"\"\"DDIM核心类，实现跳跃式确定性采样\"\"\"\n    def __init__(self, model, betas, T=1000, sample_steps=50):\n        super().__init__()\n        self.model = model  # U-Net网络\n        self.T = T          # 总时间步\n        self.sample_steps = sample_steps  # 采样时使用的跳步步长\n        self.betas = betas\n        self.alphas = 1. - betas\n        self.alpha_bars = np.cumprod(self.alphas)\n\n        # 生成跳步时间序列（如从T到0，每隔T/sample_steps步取一个点）\n        self.sampling_timesteps = np.linspace(0, T-1, sample_steps, dtype=np.int64)[::-1]\n\n    def p_sample(self, x, t):\n        \"\"\"确定性去噪单步（σ=0）\"\"\"\n        alpha = self.alphas[t]\n        alpha_bar = self.alpha_bars[t]\n        sqrt_alpha = ops.sqrt(alpha)\n        sqrt_one_minus_alpha = ops.sqrt(1 - alpha)\n\n        # 预测噪声并估计原始数据\n        pred_noise = self.model(x, t)\n        pred_x0 = (x - sqrt_one_minus_alpha * pred_noise) / sqrt_alpha\n\n        # DDIM确定性采样公式\n        alpha_bar_prev = self.alpha_bars[t-1] if t > 0 else 1.0\n        sqrt_alpha_bar_prev = ops.sqrt(alpha_bar_prev)\n        sqrt_one_minus_alpha_bar_prev = ops.sqrt(1 - alpha_bar_prev)\n\n        x_prev = sqrt_alpha_bar_prev * pred_x0 + sqrt_one_minus_alpha_bar_prev * pred_noise\n        return x_prev\n\n    def construct(self, x):\n        \"\"\"跳步采样过程（从x_T到x_0）\"\"\"\n        for t in self.sampling_timesteps:\n            x = self.p_sample(x, t)\n        return x\n\n# U-Net 改进\nclass UNet(nn.Cell):\n    \"\"\"带GroupNorm和SiLU的轻量化U-Net\"\"\"\n    def __init__(self, in_channels=3, channel_dim=128):\n        super().__init__()\n        self.time_embed = nn.SequentialCell(\n            nn.Embedding(1000, channel_dim),\n            nn.SiLU(),\n            nn.Dense(channel_dim, channel_dim * 4)\n        )\n\n        self.down = nn.SequentialCell(\n            nn.Conv2d(in_channels, channel_dim, 3, padding=1),\n            nn.GroupNorm(32, channel_dim),\n            nn.SiLU(),\n            nn.Conv2d(channel_dim, channel_dim * 2, 3, padding=1, stride=2),\n            nn.GroupNorm(32, channel_dim * 2),\n            nn.SiLU()\n        )\n\n        self.up = nn.SequentialCell(\n            nn.Conv2dTranspose(channel_dim * 2, channel_dim, 3, stride=2, padding=1),\n            nn.GroupNorm(32, channel_dim),\n            nn.SiLU(),\n            nn.Conv2d(channel_dim, in_channels, 3, padding=1),\n            nn.Tanh()\n        )\n\n    def construct(self, x, t):\n        t_emb = self.time_embed(t)\n        h = self.down(x) + t_emb.view(-1, h.shape[1], 1, 1)\n        return self.up(h)\n",[353],{"type":18,"tag":329,"props":354,"children":355},{"__ignoreMap":7},[356],{"type":24,"value":351},{"title":7,"searchDepth":358,"depth":358,"links":359},4,[360],{"id":28,"depth":361,"text":31,"children":362},2,[363,365,370,374,378],{"id":61,"depth":364,"text":64},3,{"id":111,"depth":364,"text":114,"children":366},[367,368,369],{"id":118,"depth":358,"text":121},{"id":158,"depth":358,"text":161},{"id":212,"depth":358,"text":215},{"id":233,"depth":364,"text":236,"children":371},[372,373],{"id":239,"depth":358,"text":242},{"id":260,"depth":358,"text":263},{"id":281,"depth":364,"text":284,"children":375},[376,377],{"id":292,"depth":358,"text":295},{"id":316,"depth":358,"text":319},{"id":344,"depth":364,"text":347},"markdown","content:technology-blogs:zh:3749.md","content","technology-blogs/zh/3749.md","technology-blogs/zh/3749","md",1776506134412]