[{"data":1,"prerenderedAt":374},["ShallowReactive",2],{"content-query-EI5w56EzG1":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"body":13,"_type":368,"_id":369,"_source":370,"_file":371,"_stem":372,"_extension":373},"/technology-blogs/zh/3001","zh",false,"","MindSpore AI科学计算系列 | 生成扩散模型DiffDock显著提升分子对接准确率","作者：于璠 来源：知乎","2024-02-26","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/03/01/3acea974ea134481a8334dda741bf95c.png","technology-blogs",{"type":14,"children":15,"toc":363},"root",[16,24,43,51,65,70,75,80,85,90,95,100,110,115,123,128,142,147,152,157,173,178,185,190,195,213,218,228,233,240,245,250,257,262,270,275,280,285,290,295,304,309,314,319,330,341,352],{"type":17,"tag":18,"props":19,"children":21},"element","h1",{"id":20},"mindspore-ai科学计算系列-生成扩散模型diffdock显著提升分子对接准确率",[22],{"type":23,"value":8},"text",{"type":17,"tag":25,"props":26,"children":27},"p",{},[28,30,36,38],{"type":23,"value":29},"**作者：**",{"type":17,"tag":31,"props":32,"children":33},"strong",{},[34],{"type":23,"value":35},"于璠",{"type":23,"value":37}," ",{"type":17,"tag":31,"props":39,"children":40},{},[41],{"type":23,"value":42},"来源：知乎",{"type":17,"tag":25,"props":44,"children":45},{},[46],{"type":17,"tag":31,"props":47,"children":48},{},[49],{"type":23,"value":50},"背景",{"type":17,"tag":25,"props":52,"children":53},{},[54,56],{"type":23,"value":55},"关于药物发现中的分子对接（molecular docking）任务，在此前的一篇专栏文章当中做过一些基本的介绍（",{"type":17,"tag":57,"props":58,"children":62},"a",{"href":59,"rel":60},"https://zhuanlan.zhihu.com/p/492204441%EF%BC%89%E3%80%82%E5%88%86%E5%AD%90%E5%AF%B9%E6%8E%A5%E8%BF%87%E5%8E%BB%E9%95%BF%E6%9C%9F%E8%A2%AB%E8%AE%A4%E4%B8%BA%E6%98%AF%E4%B8%80%E4%B8%AA%E6%90%9C%E7%B4%A2%E4%BB%BB%E5%8A%A1%EF%BC%8C%E4%BE%8B%E5%A6%82%E5%88%86%E5%AD%90%E4%B9%8B%E9%97%B4%E7%9A%84%E7%A9%BA%E9%97%B4%E8%AF%86%E5%88%AB%E5%B8%B8%E9%87%87%E7%94%A8%E6%A0%BC%E7%82%B9%E8%AE%A1%E7%AE%97%E3%80%81%E7%89%87%E6%96%AD%E7%94%9F%E9%95%BF%E7%AD%89%E6%96%B9%E6%B3%95%EF%BC%8C%E8%80%8C%E8%83%BD%E9%87%8F%E8%AE%A1%E7%AE%97%E5%88%99%E4%BD%BF%E7%94%A8%E6%A8%A1%E6%8B%9F%E9%80%80%E7%81%AB%E3%80%81%E9%81%97%E4%BC%A0%E7%AE%97%E6%B3%95%E7%AD%89%E6%96%B9%E6%B3%95%E3%80%82%E9%9A%8F%E7%9D%80%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0%E7%9A%84%E5%85%B4%E8%B5%B7%EF%BC%8C%E5%88%86%E5%AD%90%E5%AF%B9%E6%8E%A5%E8%A2%AB%E6%9B%B4%E5%A4%9A%E7%9A%84%E8%A7%86%E4%B8%BA%E4%B8%80%E7%A7%8D%E5%9B%9E%E5%BD%92%E4%BB%BB%E5%8A%A1%EF%BC%8C%E8%BF%99%E4%B8%80%E5%8F%98%E5%8C%96%E5%B7%B2%E7%BB%8F%E5%B8%A6%E6%9D%A5%E4%BA%86%E5%88%86%E5%AD%90%E5%AF%B9%E6%8E%A5%E9%80%9F%E5%BA%A6%E7%9A%84%E6%98%BE%E8%91%97%E6%8F%90%E5%8D%87%EF%BC%8C%E4%BD%86%E6%98%AF%E7%BB%93%E6%9E%9C%E7%9A%84%E5%87%86%E7%A1%AE%E6%80%A7%E4%BB%8D%E6%9C%89%E5%BE%85%E6%94%B9%E5%96%84%E3%80%82",[61],"nofollow",[63],{"type":23,"value":64},"https://zhuanlan.zhihu.com/p/492204441）。分子对接过去长期被认为是一个搜索任务，例如分子之间的空间识别常采用格点计算、片断生长等方法，而能量计算则使用模拟退火、遗传算法等方法。随着深度学习的兴起，分子对接被更多的视为一种回归任务，这一变化已经带来了分子对接速度的显著提升，但是结果的准确性仍有待改善。",{"type":17,"tag":25,"props":66,"children":67},{},[68],{"type":23,"value":69},"本次给大家分享的是来自MIT CSAIL的Regina Barzilay教授和Tommi Jaakkola教授课题组的工作，他们将分子对接视为一种生成任务，并采用了时下在图像生成等领域相当热门的生成扩散模型（DGM）。他们的这一模型DiffDock在已知的对接任务中取得了38%的成功率，对比最先进的深度学习模型（20%）和基于搜索的方法（23%）成功率有较大提升，同时速度也有3-12倍提升。对于被折叠的复合物结构，此前各种方法最大仅达到10.4%的对接成功率，而DiffDock仍达到了21.7%的成功率水平。",{"type":17,"tag":25,"props":71,"children":72},{},[73],{"type":23,"value":74},"论文：",{"type":17,"tag":25,"props":76,"children":77},{},[78],{"type":23,"value":79},"DiffDock: Diffusion Steps, Twists, and Turns for Molecular Docking",{"type":17,"tag":25,"props":81,"children":82},{},[83],{"type":23,"value":84},"链接：",{"type":17,"tag":25,"props":86,"children":87},{},[88],{"type":23,"value":89},"arxiv.org/abs/2210.01776",{"type":17,"tag":25,"props":91,"children":92},{},[93],{"type":23,"value":94},"代码：",{"type":17,"tag":25,"props":96,"children":97},{},[98],{"type":23,"value":99},"github.com/gcorso/DiffDock/",{"type":17,"tag":25,"props":101,"children":102},{},[103,105],{"type":23,"value":104},"**1、**",{"type":17,"tag":31,"props":106,"children":107},{},[108],{"type":23,"value":109},"模型",{"type":17,"tag":25,"props":111,"children":112},{},[113],{"type":23,"value":114},"本文的模型概览如图1所示，该模型以单独的配体和蛋白质作为输入结构。随机采样的初始对接位姿在平移、旋转和扭转自由度上反向扩散去噪。被采样的对接位姿按信心模型，产生一系列预测和信心评分，并按照评分排序，获得最终预测。",{"type":17,"tag":25,"props":116,"children":117},{},[118],{"type":17,"tag":119,"props":120,"children":122},"img",{"alt":7,"src":121},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/03/01/858961185f4047c9a8bdba8080f49246.png",[],{"type":17,"tag":25,"props":124,"children":125},{},[126],{"type":23,"value":127},"图1. DiffDock模型概览",{"type":17,"tag":25,"props":129,"children":130},{},[131,136,137],{"type":17,"tag":31,"props":132,"children":133},{},[134],{"type":23,"value":135},"1.1",{"type":23,"value":37},{"type":17,"tag":31,"props":138,"children":139},{},[140],{"type":23,"value":141},"配体位姿",{"type":17,"tag":25,"props":143,"children":144},{},[145],{"type":23,"value":146},"配体位姿（ligand pose）就是配体在三维空间中所有原子的位置，原则上可以认为位姿x是3n维流形（manifold）空间中的一个点，其中n是原子的数量。",{"type":17,"tag":25,"props":148,"children":149},{},[150],{"type":23,"value":151},"然而，对于分子对接来说，自由度远没有3n这么多，因为键长、键角和配体中的环基本上都是刚性的，使得配体的柔性几乎只体现在可旋转键的扭转角上。传统对接方法以及大多数机器学习方法，将3n维流形空间中单独配体的种子构象c作为输入，并仅改变最终结合构型中的相对位置和扭转自由度。因此符合c构象的位姿空间就是一个(m+6)维的子流形空间（m是可旋转键的数量，6来自于对目标对接蛋白质整体的旋转平移）。",{"type":17,"tag":25,"props":153,"children":154},{},[155],{"type":23,"value":156},"关于流形空间的另一种表述是：“任何与种子构象一致的配体姿态都可以通过平移、旋转和扭转角的变化组合达到。作者遵循这个范式，将种子构象c作为输入，并将分子对接作为学习以蛋白质结构y为条件的p_c(x | y)条件概率分布的任务。另外，作者定义了从一个流形到另一个“更好的”流形的一对一映射（变换），其中的扩散kernel可以被直接采样。",{"type":17,"tag":25,"props":158,"children":159},{},[160,162,167,168],{"type":23,"value":161},"**1.**",{"type":17,"tag":31,"props":163,"children":164},{},[165],{"type":23,"value":166},"2",{"type":23,"value":37},{"type":17,"tag":31,"props":169,"children":170},{},[171],{"type":23,"value":172},"扩散模型",{"type":17,"tag":25,"props":174,"children":175},{},[176],{"type":23,"value":177},"对于配体位姿的以上三种转换（平移、旋转、扭转角变化），随机扩散方程可以被统一定义为如下形式：",{"type":17,"tag":25,"props":179,"children":180},{},[181],{"type":17,"tag":119,"props":182,"children":184},{"alt":7,"src":183},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/03/01/3c1cbb6ea0de45c985d208022226a97c.png",[],{"type":17,"tag":25,"props":186,"children":187},{},[188],{"type":23,"value":189},"其中x为位姿，w为布朗位移，σ^2为噪声的方差。平移情况下的扩散最简单，就是一个标准高斯分布，而旋转和扭转角变化情况下的扩散则略微复杂，具体的实现见Nikolayev & Savyolov, 1970[1]和Leach et al., 2022[2]。",{"type":17,"tag":25,"props":191,"children":192},{},[193],{"type":23,"value":194},"尽管作者将扩散kernel定义在了(m+6)维的子流形空间上，但扩散kernel训练和推理的过程仍在三维坐标空间中进行。这是因为给打分模型提供完整的分子三维结构（而不是流形空间中抽象的一个点），有助于让模型理解分子间的物理相互作用，并且不会被扭转角定义的人为因素所影响，最终也有利于推广到从未见过的复合体的情形。",{"type":17,"tag":25,"props":196,"children":197},{},[198,199,204,205],{"type":23,"value":161},{"type":17,"tag":31,"props":200,"children":201},{},[202],{"type":23,"value":203},"3",{"type":23,"value":37},{"type":17,"tag":31,"props":206,"children":207},{},[208],{"type":17,"tag":31,"props":209,"children":210},{},[211],{"type":23,"value":212},"信心模型",{"type":17,"tag":25,"props":214,"children":215},{},[216],{"type":23,"value":217},"为获得信心模型d(x, y)的训练数据，作者先运行训练好的扩散模型，为每个训练的样例获得一系列的候选位姿，并生成检测候选位姿的均方根偏差（RMSD）是否小于2Å的T/F标签。然后信心模型就用交叉熵损失来训练，目标是为每个位姿预测一个准确的T/F标签。在推理过程中，扩散模型并行运行并产生N个位姿，并传递给信心模型，最后信心模型会根据获得的RMSD小于2Å的信心值给这些位姿排序。",{"type":17,"tag":25,"props":219,"children":220},{},[221,223],{"type":23,"value":222},"**2、**",{"type":17,"tag":31,"props":224,"children":225},{},[226],{"type":23,"value":227},"结果",{"type":17,"tag":25,"props":229,"children":230},{},[231],{"type":23,"value":232},"作者使用了PDBBind（一个从PDB库采集的蛋白-配体复合物结构的数据集）上的复合物来评估DiffDock生成10个样本和生成40个样本的效果。所得结果和最佳的基于搜索的分子对接方法SMINA，QuickVina-W，GLIDE，GNINA以及更老的Autodock Vina进行了对比，也和最近的一些基于深度学习的分子对接方法如EquiBind和TANKBind等对比。结果如下：",{"type":17,"tag":25,"props":234,"children":235},{},[236],{"type":17,"tag":119,"props":237,"children":239},{"alt":7,"src":238},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/03/01/844072a90feb4a39b8630830f5291f00.png",[],{"type":17,"tag":25,"props":241,"children":242},{},[243],{"type":23,"value":244},"图2. 基于PDBBind数据的对接结果，与其他分子对接方法的对比",{"type":17,"tag":25,"props":246,"children":247},{},[248],{"type":23,"value":249},"同时作者也量化了DiffDock的表现随着生成样本数的变化，以及选择最佳对接结果的准确性。如下图所示：",{"type":17,"tag":25,"props":251,"children":252},{},[253],{"type":17,"tag":119,"props":254,"children":256},{"alt":7,"src":255},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/03/01/641c5a6888ce4f3d948199d3694876d4.png",[],{"type":17,"tag":25,"props":258,"children":259},{},[260],{"type":23,"value":261},"图3. DiffDock的表现随生成样本数的变化(左)&选择最佳对接结果的准确性(右)",{"type":17,"tag":25,"props":263,"children":264},{},[265],{"type":17,"tag":31,"props":266,"children":267},{},[268],{"type":23,"value":269},"3、小结和****感想",{"type":17,"tag":25,"props":271,"children":272},{},[273],{"type":23,"value":274},"可以看到本项工作的贡献主要有以下几项：",{"type":17,"tag":25,"props":276,"children":277},{},[278],{"type":23,"value":279},"1、将分子对接任务定义为一个生成问题。",{"type":17,"tag":25,"props":281,"children":282},{},[283],{"type":23,"value":284},"2、根据分子对接的自由度，给配体位姿构造了一个新的扩散过程。",{"type":17,"tag":25,"props":286,"children":287},{},[288],{"type":23,"value":289},"3、在PDBBind上实现了38%的Top1预测，大大超过了以前的最佳搜索算法（23%）和深度学习方法（20%）。",{"type":17,"tag":25,"props":291,"children":292},{},[293],{"type":23,"value":294},"总的来说，本工作中将分子对接视为生成式问题的视角是非常新颖的，但也要看到，尽管DiffDock相比过去的一些方法有了准确度的提升，但仍然无法达到完全替代传统方案的水平。期待这一领域有更多采用AI方法的新思路，进一步推动药物发现工具的发展。",{"type":17,"tag":296,"props":297,"children":299},"h2",{"id":298},"参考文献",[300],{"type":17,"tag":31,"props":301,"children":302},{},[303],{"type":23,"value":298},{"type":17,"tag":25,"props":305,"children":306},{},[307],{"type":23,"value":308},"[1] Dmitry I Nikolayev and Tatjana I Savyolov. Normal distribution on the rotation group so (3). Textures and Microstructures, 29, 1970.",{"type":17,"tag":25,"props":310,"children":311},{},[312],{"type":23,"value":313},"[2] Adam Leach, Sebastian M Schmon, Matteo T Degiacomi, and Chris G Willcocks. Denoising diffusion probabilistic models on so (3) for rotational alignment. In ICLR 2022 Workshop on Geometrical and Topological Representation Learning, 2022",{"type":17,"tag":25,"props":315,"children":316},{},[317],{"type":23,"value":318},"往期回顾",{"type":17,"tag":18,"props":320,"children":322},{"id":321},"mindspore-ai科学计算系列-meshgpt显著提升三维几何表示的质量",[323],{"type":17,"tag":57,"props":324,"children":327},{"href":325,"rel":326},"http://mp.weixin.qq.com/s?__biz=MzkxMTM2MjMzNg==&mid=2247615331&idx=1&sn=b5f6a74a14168a26b03f4d3c854bb5dc&chksm=c11e132cf6699a3a583f4b923f5060f0718799d260d67c90c134b14ff9ec3ab387c70d42766a&scene=21#wechat_redirect",[61],[328],{"type":23,"value":329},"MindSpore AI科学计算系列 | MeshGPT显著提升三维几何表示的质量",{"type":17,"tag":18,"props":331,"children":333},{"id":332},"mindspore-ai科学计算系列-metnet3融合稀疏站点数据实现提前24h天气预报",[334],{"type":17,"tag":57,"props":335,"children":338},{"href":336,"rel":337},"http://mp.weixin.qq.com/s?__biz=MzkxMTM2MjMzNg==&mid=2247614540&idx=1&sn=08b091b4e629243defcdf97270234d88&chksm=c11e1003f6699915db3166681b1cb30c321964d71c0b1b362ba62d7ac3f1f05f2586b396be25&scene=21#wechat_redirect",[61],[339],{"type":23,"value":340},"MindSpore AI科学计算系列 | MetNet3融合稀疏站点数据，实现提前24h天气预报",{"type":17,"tag":18,"props":342,"children":344},{"id":343},"mindspore-ai科学计算系列-化学深度学习模型chemgpt的性能评估公式拟合",[345],{"type":17,"tag":57,"props":346,"children":349},{"href":347,"rel":348},"http://mp.weixin.qq.com/s?__biz=MzkxMTM2MjMzNg==&mid=2247614481&idx=1&sn=e45cb91a0dc5f51db7e80761d639a474&chksm=c11e105ef66999488545e230a32ef8c771768c8b488024d5b039bba9ed12974339f795cbe42e&scene=21#wechat_redirect",[61],[350],{"type":23,"value":351},"MindSpore AI科学计算系列 | 化学深度学习模型ChemGPT的性能评估公式拟合",{"type":17,"tag":18,"props":353,"children":355},{"id":354},"mindspore-ai科学计算系列-gnn-mom基于昇思mindspore-elec的图残差学习电磁求解器",[356],{"type":17,"tag":57,"props":357,"children":360},{"href":358,"rel":359},"http://mp.weixin.qq.com/s?__biz=MzkxMTM2MjMzNg==&mid=2247614352&idx=1&sn=6eddd5488123a7bfbb85681317d6b950&chksm=c11e2fdff669a6c95c412f2e5711d34310f56cd9d468bd7a5c0edecc6cfd6cd55ecefee097a0&scene=21#wechat_redirect",[61],[361],{"type":23,"value":362},"MindSpore AI科学计算系列 | GNN-MoM基于昇思MindSpore Elec的图残差学习电磁求解器",{"title":7,"searchDepth":364,"depth":364,"links":365},4,[366],{"id":298,"depth":367,"text":298},2,"markdown","content:technology-blogs:zh:3001.md","content","technology-blogs/zh/3001.md","technology-blogs/zh/3001","md",1776506125185]