[{"data":1,"prerenderedAt":486},["ShallowReactive",2],{"content-query-NN759Vo3yM":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"body":13,"_type":480,"_id":481,"_source":482,"_file":483,"_stem":484,"_extension":485},"/news/zh/2484","zh",false,"","项目分享 | 基于昇思MindSpore AI框架的肾脏肿瘤分割一等奖[咸鱼]团队思路","本方案为【基于昇思MindSpore AI框架的肾脏肿瘤分割】赛题的一等奖获奖方案，赛题地址：http://go.datafountain.cn/3056（戳底部“阅读原文”可直达）","2023-05-16","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2023/05/22/0fc15e17e61246a5adda083e1969668e.png","news",{"type":14,"children":15,"toc":477},"root",[16,33,43,48,60,81,89,94,99,104,112,117,125,130,138,143,163,168,176,181,186,194,199,204,212,217,225,230,238,243,252,257,262,270,280,285,290,298,308,313,318,326,336,341,346,354,359,367,377,382,387,395,405,410,415,423,431,439,444,452,457,462,467,472],{"type":17,"tag":18,"props":19,"children":21},"element","h1",{"id":20},"项目分享-基于昇思mindspore-ai框架的肾脏肿瘤分割一等奖咸鱼团队思路",[22,25,31],{"type":23,"value":24},"text","项目分享 | 基于昇思MindSpore AI框架的肾脏肿瘤分割一等奖",{"type":17,"tag":26,"props":27,"children":28},"span",{},[29],{"type":23,"value":30},"咸鱼",{"type":23,"value":32},"团队思路",{"type":17,"tag":34,"props":35,"children":36},"p",{},[37],{"type":17,"tag":38,"props":39,"children":40},"strong",{},[41],{"type":23,"value":42},"2022 CCF BDCI",{"type":17,"tag":34,"props":44,"children":45},{},[46],{"type":23,"value":47},"第十届CCF大数据与计算智能大赛",{"type":17,"tag":34,"props":49,"children":50},{},[51,53,58],{"type":23,"value":52},"第十届CCF大数据与计算智能大赛（2022 CCF BDCI）已圆满结束，",{"type":17,"tag":38,"props":54,"children":55},{},[56],{"type":23,"value":57},"大赛官方竞赛平台DataFountain",{"type":23,"value":59},"（简称DF平台）将陆续释出各赛题获奖队伍的方案思路，欢迎广大数据科学家交流讨论。",{"type":17,"tag":34,"props":61,"children":62},{},[63,65,70,72],{"type":23,"value":64},"本方案为**【基于昇思MindSpore AI框架的肾脏肿瘤分割】",{"type":17,"tag":38,"props":66,"children":67},{},[68],{"type":23,"value":69},"赛题的",{"type":23,"value":71},"一等****奖**获奖方案，赛题地址：",{"type":17,"tag":73,"props":74,"children":78},"a",{"href":75,"rel":76},"http://go.datafountain.cn/3056%EF%BC%88%E6%88%B3%E5%BA%95%E9%83%A8%E2%80%9C%E9%98%85%E8%AF%BB%E5%8E%9F%E6%96%87%E2%80%9D%E5%8F%AF%E7%9B%B4%E8%BE%BE%EF%BC%89",[77],"nofollow",[79],{"type":23,"value":80},"http://go.datafountain.cn/3056（戳底部“阅读原文”可直达）",{"type":17,"tag":34,"props":82,"children":83},{},[84],{"type":17,"tag":38,"props":85,"children":86},{},[87],{"type":23,"value":88},"获奖团队简介",{"type":17,"tag":34,"props":90,"children":91},{},[92],{"type":23,"value":93},"**团队名称：**咸鱼",{"type":17,"tag":34,"props":95,"children":96},{},[97],{"type":23,"value":98},"**团队成员：**本团队由两名中国科学院计算技术研究所的硕士一年级学生组成。在选修研究生一年级的人工智能课时，受老师推荐参加此比赛。",{"type":17,"tag":34,"props":100,"children":101},{},[102],{"type":23,"value":103},"**所获奖项：**一等奖",{"type":17,"tag":34,"props":105,"children":106},{},[107],{"type":17,"tag":38,"props":108,"children":109},{},[110],{"type":23,"value":111},"摘要",{"type":17,"tag":34,"props":113,"children":114},{},[115],{"type":23,"value":116},"这个比赛是一个医学图像分割比赛，我们在比赛中采用了数据增强等方法，并创新性地提出了Res-U-Net模型，最后在训练时加入Lovasz-Softmax损失函数，使得模型在测试集上取得较好的训练效果。",{"type":17,"tag":34,"props":118,"children":119},{},[120],{"type":17,"tag":38,"props":121,"children":122},{},[123],{"type":23,"value":124},"关键词",{"type":17,"tag":34,"props":126,"children":127},{},[128],{"type":23,"value":129},"语义分割，数据增强，ResNet，Lovasz-Softmax",{"type":17,"tag":34,"props":131,"children":132},{},[133],{"type":17,"tag":38,"props":134,"children":135},{},[136],{"type":23,"value":137},"1、引言",{"type":17,"tag":34,"props":139,"children":140},{},[141],{"type":23,"value":142},"这个比赛的任务是深度学习中的语义分割任务。这是一个非常经典的深度学习任务，多年以来已经被研究得十分透彻。我们在参考前人的方法的同时，也创新性地提出了自己的方法，这些方法包括：",{"type":17,"tag":144,"props":145,"children":146},"ol",{},[147,153,158],{"type":17,"tag":148,"props":149,"children":150},"li",{},[151],{"type":23,"value":152},"数据增强，我们采用了多种数据增强手段，并结合此次比赛的数据特点进行数据增强，提高了模型的泛化能力。",{"type":17,"tag":148,"props":154,"children":155},{},[156],{"type":23,"value":157},"创新性地提出Res-U-Net模型，结合了U-Net的结构并充分利用了ResNet Block的优点，使得模型的训练和推理较为稳定。",{"type":17,"tag":148,"props":159,"children":160},{},[161],{"type":23,"value":162},"采用了多种损失函数相结合的方法，使用带权重的交叉熵损失，并配合Locasz-Softmax损失进行训练。",{"type":17,"tag":34,"props":164,"children":165},{},[166],{"type":23,"value":167},"最终我们的方法在验证集和测试集上都取得较好的效果。",{"type":17,"tag":34,"props":169,"children":170},{},[171],{"type":17,"tag":38,"props":172,"children":173},{},[174],{"type":23,"value":175},"2、数据集",{"type":17,"tag":34,"props":177,"children":178},{},[179],{"type":23,"value":180},"这个比赛采用的训练集是kits19数据集[5]，测试集是比赛主办方的私有数据。这些数据集都是CT扫描的肾脏肿瘤数据。训练集一共有210个肾脏肿瘤病人的扫描结果，每一个病人的扫描结果包含约300张扫描图片。训练集一共被标注了3类，分别是背景类、肾脏、肾脏肿瘤。",{"type":17,"tag":34,"props":182,"children":183},{},[184],{"type":23,"value":185},"我们在训练集的210个病人数据中随机选取30个病人数据作为验证集，剩下的180个病人数据作为训练集。",{"type":17,"tag":34,"props":187,"children":188},{},[189],{"type":17,"tag":38,"props":190,"children":191},{},[192],{"type":23,"value":193},"3、数据处理",{"type":17,"tag":34,"props":195,"children":196},{},[197],{"type":23,"value":198},"注意到CT肾脏扫描的图像在时空上是连续的，即，每一张图像与前后图像都有紧密的时空联系。我们在训练时选取一张训练集图像的同时，也会选取这张训练集图像的前后各一张图像，并把它们拼接在一起。即，训练时采用的图像大小为512×512×3。",{"type":17,"tag":34,"props":200,"children":201},{},[202],{"type":23,"value":203},"为提高模型的泛化能力，降低训练模型对于某些样本和属性的依赖，我们在训练数据读取时通过图像增强的方式产生更丰富的训练样本。我们通过图像的几何变换操作进行图像增强，变换方法包括随机旋转、随机翻转和随机裁剪，训练样本在加入到训练集前将依次通过以上方法进行增强。",{"type":17,"tag":34,"props":205,"children":206},{},[207],{"type":17,"tag":38,"props":208,"children":209},{},[210],{"type":23,"value":211},"3.1 随机旋转和随机翻转",{"type":17,"tag":34,"props":213,"children":214},{},[215],{"type":23,"value":216},"图像的随机旋转和随机翻转是最常用的图像增强方法。我们对样本进行小幅度（-9°，9°）的均匀随机旋转，并依0.5的概率进行随机水平翻转。",{"type":17,"tag":34,"props":218,"children":219},{},[220],{"type":17,"tag":38,"props":221,"children":222},{},[223],{"type":23,"value":224},"3.2 随机裁剪",{"type":17,"tag":34,"props":226,"children":227},{},[228],{"type":23,"value":229},"我们先对样本图像的4个边进行12像素的填充，即，把图像由512×512填充为536×536。然后对图像进行随机面积和随机长宽比裁剪。图像的∈∈随机面积裁剪比例的区间为(0.92,0.99)，即，Scrop/Spad∈(0.92,0.99)，其中Scrop表示裁剪图像的面积，Spad表示填充图像的面积。样本的随机长宽比范围为(0.96.1.04)，即，H/W∈(0.96.1.04)，其中H为图像的高，W为图像的宽。最后将裁剪后的图像缩放到512×512。",{"type":17,"tag":34,"props":231,"children":232},{},[233],{"type":17,"tag":38,"props":234,"children":235},{},[236],{"type":23,"value":237},"4、模型",{"type":17,"tag":34,"props":239,"children":240},{},[241],{"type":23,"value":242},"我们提出的模型Res-U-Net如图4.1所示，这个模型的主要结构参考自Cortinhal 等人的工作[1]，其主干为经典的encoder-decoder结构。Res-U-Net的输入是一个512×512×3的图像，其中3是图像的通道数，512是图像的宽和高。输入图像经过的第一个层为Conv0卷积层，Conv0卷积层会初步学习图像的基础特征，并把图像的通道数扩展到32。紧随其后的是5层encoder，encoder在图4.1中表示为ResDownSample层。ResDownSample会对特征图进行下采样，下采样后特征图的宽度和高度都缩减为原来的1/2。与此同时，ResDownSample会学习特征图的特征并把其通道数扩展为原来的2倍。在encoder之后，对应的，一共有5层decoder，decoder在图4.1中被表示为UpSample和CatResBlock。UpSample层会对特征图进行上采样，上采样后特征图的宽度和高度会扩展为原来的2倍，通道数会缩减为原来的1/4。上采样后的特征图会与前面对应的ResDownSample层的输出进行拼接，这里的拼接参考了Ronneberger等人的工作[2]。在拼接前ResDownSample层的输出特征图会先通过一个1×1的Conv层进行通道数的缩减，这个通道数的缩减是为了确保后面的CatResBlock的输入特征图具有合理的通道数。CatResBlock会把输入的两个特征图进行拼接并学习其特征。",{"type":17,"tag":34,"props":244,"children":245},{},[246],{"type":17,"tag":247,"props":248,"children":251},"img",{"alt":249,"src":250},"image.png","https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/e64/154/b38/90a1d5d431e64154b387b3660e356ff5.20230522033908.27523162343905452752724342914719:20230522063422:2400:96397CF2C7E88E7B5A9881F68E4A60DB4BEECC673C546F3BBDAA5AA6FE68E52B.png",[],{"type":17,"tag":34,"props":253,"children":254},{},[255],{"type":23,"value":256},"图4.1：Res-U-Net模型结构图",{"type":17,"tag":34,"props":258,"children":259},{},[260],{"type":23,"value":261},"在经过5次UpSample后输出特征图的大小为512×512×16，这个特征图会与Conv0层的输出进行拼接，最后再经过一次卷积并输出512×512×3的特征图。",{"type":17,"tag":34,"props":263,"children":264},{},[265],{"type":17,"tag":38,"props":266,"children":267},{},[268],{"type":23,"value":269},"4.1 Conv0层",{"type":17,"tag":34,"props":271,"children":272},{},[273],{"type":17,"tag":38,"props":274,"children":275},{},[276],{"type":17,"tag":247,"props":277,"children":279},{"alt":249,"src":278},"https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/e64/154/b38/90a1d5d431e64154b387b3660e356ff5.20230522033925.49471531511135095833433232113845:20230522063422:2400:BD9E2C072C11F2B07E03C1EB61F7B1E781BC8E9CC37C051EC8FF47F20027E7E3.png",[],{"type":17,"tag":34,"props":281,"children":282},{},[283],{"type":23,"value":284},"图4.2：Conv0层的结构",{"type":17,"tag":34,"props":286,"children":287},{},[288],{"type":23,"value":289},"Conv0层的结构如图4.2所示，其由一个3×3 Conv，一个Batch Norm和一个Leaky ReLU层组成。3×3 Conv的stride = 1，在图中表示为s = 1，其卷积操作不改变特征图的宽和高。Conv0层的作用是初步提取图像的特征信息，为后面的特征提取打基础。",{"type":17,"tag":34,"props":291,"children":292},{},[293],{"type":17,"tag":38,"props":294,"children":295},{},[296],{"type":23,"value":297},"4.2 ResDownSample层",{"type":17,"tag":34,"props":299,"children":300},{},[301],{"type":17,"tag":38,"props":302,"children":303},{},[304],{"type":17,"tag":247,"props":305,"children":307},{"alt":249,"src":306},"https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/e64/154/b38/90a1d5d431e64154b387b3660e356ff5.20230522033948.12504391078192072314962988495372:20230522063422:2400:2E6AEAAD7338BE8C5ED8271E5DA18056F2EAFB3861BFD172704647C8937FBBF5.png",[],{"type":17,"tag":34,"props":309,"children":310},{},[311],{"type":23,"value":312},"图4.3：ResDownSample层的结构",{"type":17,"tag":34,"props":314,"children":315},{},[316],{"type":23,"value":317},"ResDownSample层的结构如图4.3所示，其结构参考了He等人的工作[3]。第1个卷积层是一个3×3 Conv，其stride = 2，目的是为了下采样，使图像的宽和高缩减为原来的1/2。其后接着的是Batch Norm、Leaky ReLU层。第2个卷积层的stride = 1，其不改变特征图的宽和高。输入特征图还会经过另一个stride = 2的卷积层，这个卷积层在He等人的工作[3]里被称为shortcut。经过shortcut后的特征图与经过第2个卷积层的特征图相加，随后再经过Leaky ReLU输出。",{"type":17,"tag":34,"props":319,"children":320},{},[321],{"type":17,"tag":38,"props":322,"children":323},{},[324],{"type":23,"value":325},"4.3 Connector层",{"type":17,"tag":34,"props":327,"children":328},{},[329],{"type":17,"tag":38,"props":330,"children":331},{},[332],{"type":17,"tag":247,"props":333,"children":335},{"alt":249,"src":334},"https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/e64/154/b38/90a1d5d431e64154b387b3660e356ff5.20230522034010.32451683764323903605996043721529:20230522063422:2400:9A71AE67373C5580FF4B76295485EDEDBF423A629FC4EFCE92D4A852740FF854.png",[],{"type":17,"tag":34,"props":337,"children":338},{},[339],{"type":23,"value":340},"图4.4：Connector层的结构",{"type":17,"tag":34,"props":342,"children":343},{},[344],{"type":23,"value":345},"Connector层的结构如图4.4所示，其结构和Conv0层基本一致，只不过其卷积层的stride = 2，目的是为了对特征图进行下采样。",{"type":17,"tag":34,"props":347,"children":348},{},[349],{"type":17,"tag":38,"props":350,"children":351},{},[352],{"type":23,"value":353},"4.4 UpSample层",{"type":17,"tag":34,"props":355,"children":356},{},[357],{"type":23,"value":358},"UpSample层采用的是PixelShuffle方法。PixelShuffle方法没有任何可学习的参数，它通过调换特征图的像素点顺序来进行上采样。例如：一个H x W x C的特征图经过PixelShuffle后会变成2H x 2W x C/4，其宽和高扩展为2倍，通道数缩减为原来的1/4。",{"type":17,"tag":34,"props":360,"children":361},{},[362],{"type":17,"tag":38,"props":363,"children":364},{},[365],{"type":23,"value":366},"4.5 CatResBlock层",{"type":17,"tag":34,"props":368,"children":369},{},[370],{"type":17,"tag":38,"props":371,"children":372},{},[373],{"type":17,"tag":247,"props":374,"children":376},{"alt":249,"src":375},"https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/e64/154/b38/90a1d5d431e64154b387b3660e356ff5.20230522034029.56466902355292332588855133880455:20230522063422:2400:E0C3DAC591645E6D71A6F6D3E571AE8EDBFB665BA5CEA51455F186E8641E07D1.png",[],{"type":17,"tag":34,"props":378,"children":379},{},[380],{"type":23,"value":381},"图4.5：CatResBlock层的结构",{"type":17,"tag":34,"props":383,"children":384},{},[385],{"type":23,"value":386},"CatResBlock层的结构如图4.5所示。其第一层为拼接层，目的是为了拼接前面对应的ResDownSample层的输出。后面几层依次为3×3 Conv、Batch Norm、Leaky ReLU、3×3 Conv和Batch Norm，输出的特征图与拼接的特征图相加后再通过Leaky ReLU层输出。",{"type":17,"tag":34,"props":388,"children":389},{},[390],{"type":17,"tag":38,"props":391,"children":392},{},[393],{"type":23,"value":394},"4.6 CatConv层",{"type":17,"tag":34,"props":396,"children":397},{},[398],{"type":17,"tag":38,"props":399,"children":400},{},[401],{"type":17,"tag":247,"props":402,"children":404},{"alt":249,"src":403},"https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/e64/154/b38/90a1d5d431e64154b387b3660e356ff5.20230522034049.08826267070284458980154809389046:20230522063422:2400:217C5E60F8D42AC475B32EBB80DDC3EE7BD566CACA0DEE27BA030573B784994C.png",[],{"type":17,"tag":34,"props":406,"children":407},{},[408],{"type":23,"value":409},"图4.6：CatConv层的结构",{"type":17,"tag":34,"props":411,"children":412},{},[413],{"type":23,"value":414},"CatConv层的结构如图4.6所示。其第一层为拼接层，后面几层依次为3×3 Conv、Batch Norm和Leaky ReLU层 。",{"type":17,"tag":34,"props":416,"children":417},{},[418],{"type":17,"tag":247,"props":419,"children":422},{"alt":420,"src":421},"cke_14653.png","https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/e64/154/b38/90a1d5d431e64154b387b3660e356ff5.20230522034138.53878928181020432691740307733091:20230522063422:2400:FD1D98118C95EBA504FFC4A91C00D6CF6D3E5E153542DC8E4B4E53DE82E62A8A.png",[],{"type":17,"tag":34,"props":424,"children":425},{},[426],{"type":17,"tag":247,"props":427,"children":430},{"alt":428,"src":429},"cke_16776.png","https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/e64/154/b38/90a1d5d431e64154b387b3660e356ff5.20230522034203.54199917224403191503466622941924:20230522063422:2400:81824E2D3AB438BE9BC8C48E808625A5A7A294BEC8AF78AED1B325CD1E5D60CC.png",[],{"type":17,"tag":34,"props":432,"children":433},{},[434],{"type":17,"tag":38,"props":435,"children":436},{},[437],{"type":23,"value":438},"致谢",{"type":17,"tag":34,"props":440,"children":441},{},[442],{"type":23,"value":443},"感谢人工智能课的老师，推荐我们参加这个比赛，让我们有机会走上这个比赛的舞台。",{"type":17,"tag":34,"props":445,"children":446},{},[447],{"type":17,"tag":38,"props":448,"children":449},{},[450],{"type":23,"value":451},"参考",{"type":17,"tag":34,"props":453,"children":454},{},[455],{"type":23,"value":456},"[1] Cortinhal, Tiago, George Tzelepis, and Eren Erdal Aksoy. \"SalsaNext: Fast, uncertainty-aware semantic segmentation of LiDAR point clouds.\" International Symposium on Visual Computing. Springer, Cham, 2020.",{"type":17,"tag":34,"props":458,"children":459},{},[460],{"type":23,"value":461},"[2] Ronneberger, Olaf, Philipp Fischer, and Thomas Brox. \"U-net: Convolutional networks for biomedical image segmentation.\" International Conference on Medical image computing and computer-assisted intervention. Springer, Cham, 2015.",{"type":17,"tag":34,"props":463,"children":464},{},[465],{"type":23,"value":466},"[3] He, Kaiming, et al. \"Deep residual learning for image recognition.\" Proceedings of the IEEE conference on computer vision and pattern recognition. 2016.",{"type":17,"tag":34,"props":468,"children":469},{},[470],{"type":23,"value":471},"[4] Berman, Maxim, Amal Rannen Triki, and Matthew B. Blaschko. \"The lovász-softmax loss: A tractable surrogate for the optimization of the intersection-over-union measure in neural networks.\" Proceedings of the IEEE conference on computer vision and pattern recognition. 2018.",{"type":17,"tag":34,"props":473,"children":474},{},[475],{"type":23,"value":476},"[5] Heller, Nicholas, et al. \"The state of the art in kidney and kidney tumor segmentation in contrast-enhanced CT imaging: Results of the KiTS19 challenge.\" Medical image analysis 67 (2021): 101821.",{"title":7,"searchDepth":478,"depth":478,"links":479},4,[],"markdown","content:news:zh:2484.md","content","news/zh/2484.md","news/zh/2484","md",1776506065663]