[{"data":1,"prerenderedAt":1238},["ShallowReactive",2],{"content-query-2jwxfP2mZx":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"category":13,"body":14,"_type":1232,"_id":1233,"_source":1234,"_file":1235,"_stem":1236,"_extension":1237},"/technology-blogs/zh/1693","zh",false,"","【MindSpore易点通】MindSpore Data经验解析","MindSpore Data提供了简洁、丰富的数据读取、处理、增强等功能","2022-08-12","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2022/08/15/aaa08bd022724953bae225d052dc451e.png","technology-blogs","实践",{"type":15,"children":16,"toc":1199},"root",[17,25,36,42,62,71,86,91,110,136,141,146,151,156,161,171,176,181,205,210,215,220,225,230,235,240,244,249,254,278,283,288,292,297,302,307,312,317,322,327,332,337,342,347,352,357,362,367,372,377,382,387,392,397,412,417,428,439,450,455,460,465,470,475,480,485,508,513,518,523,528,533,538,542,547,552,557,562,567,572,582,587,592,615,620,625,630,634,639,644,649,654,659,664,669,673,678,694,699,704,709,714,719,724,729,733,738,743,748,753,758,771,786,791,802,813,818,823,828,833,838,843,848,853,858,863,868,872,876,880,885,890,895,900,905,915,925,935,940,945,950,974,979,983,987,991,996,1000,1004,1009,1014,1019,1024,1043,1048,1053,1058,1063,1068,1091,1096,1100,1105,1110,1133,1138,1142,1147,1170,1174,1179,1184,1189,1194],{"type":18,"tag":19,"props":20,"children":22},"element","h1",{"id":21},"mindspore易点通mindspore-data经验解析",[23],{"type":24,"value":8},"text",{"type":18,"tag":26,"props":27,"children":29},"h2",{"id":28},"一简介",[30],{"type":18,"tag":31,"props":32,"children":33},"strong",{},[34],{"type":24,"value":35},"一、简介",{"type":18,"tag":37,"props":38,"children":39},"p",{},[40],{"type":24,"value":41},"首先MindSpore Data提供了简洁、丰富的数据读取、处理、增强等功能；同时使用读取数据的流程，主要分为三步（使用和PyTorch中数据读取方式类似）：",{"type":18,"tag":43,"props":44,"children":45},"ol",{},[46,52,57],{"type":18,"tag":47,"props":48,"children":49},"li",{},[50],{"type":24,"value":51},"数据集加载 - 根据数据格式，选择最简单、高效的数据集加载方式；",{"type":18,"tag":47,"props":53,"children":54},{},[55],{"type":24,"value":56},"数据增强 - 使用几何变换、颜色变换、旋转、平移、缩放等基本图像处理技术来扩充数据集；",{"type":18,"tag":47,"props":58,"children":59},{},[60],{"type":24,"value":61},"数据处理 - 对数据集做repeat、batch、shuffle、map、zip等操作。",{"type":18,"tag":26,"props":63,"children":65},{"id":64},"二使用说明",[66],{"type":18,"tag":31,"props":67,"children":68},{},[69],{"type":24,"value":70},"二、使用说明",{"type":18,"tag":26,"props":72,"children":74},{"id":73},"_1数据集加载",[75],{"type":18,"tag":31,"props":76,"children":77},{},[78],{"type":18,"tag":31,"props":79,"children":80},{},[81],{"type":18,"tag":31,"props":82,"children":83},{},[84],{"type":24,"value":85},"1、数据集加载",{"type":18,"tag":37,"props":87,"children":88},{},[89],{"type":24,"value":90},"首先加载要使用的数据集，根据实际使用的数据集格式，从以下三种数据集读取方式选取一种即可：",{"type":18,"tag":92,"props":93,"children":94},"ul",{},[95,100,105],{"type":18,"tag":47,"props":96,"children":97},{},[98],{"type":24,"value":99},"常用标准数据集：例如 ImageNet、MNIST、CIFAR-10、VOC等；",{"type":18,"tag":47,"props":101,"children":102},{},[103],{"type":24,"value":104},"特定格式数据集 ：特定存储格式的数据，例如：MindRecord；",{"type":18,"tag":47,"props":106,"children":107},{},[108],{"type":24,"value":109},"自定义数据集：数据组织形式自定义的数据集。",{"type":18,"tag":111,"props":112,"children":114},"h3",{"id":113},"_21-常用数据集加载",[115],{"type":18,"tag":31,"props":116,"children":117},{},[118,126,128],{"type":18,"tag":31,"props":119,"children":120},{},[121],{"type":18,"tag":31,"props":122,"children":123},{},[124],{"type":24,"value":125},"2.1",{"type":24,"value":127}," ",{"type":18,"tag":31,"props":129,"children":130},{},[131],{"type":18,"tag":31,"props":132,"children":133},{},[134],{"type":24,"value":135},"常用数据集加载",{"type":18,"tag":37,"props":137,"children":138},{},[139],{"type":24,"value":140},"目前已经支持的常用数据集有：MNIST, CIFAR-10, CIFAR-100, VOC, ImageNet, CelebA。如果使用以上开源数据集或者已经将所使用的数据整理为以上标准数据集格式，可以直接使用如下方法加载数据集。以CIFAR-10为例：",{"type":18,"tag":37,"props":142,"children":143},{},[144],{"type":24,"value":145},"import mindspore.dataset as ds",{"type":18,"tag":37,"props":147,"children":148},{},[149],{"type":24,"value":150},"DATA_DIR = \"./cifar-10-batches-bin/\"",{"type":18,"tag":37,"props":152,"children":153},{},[154],{"type":24,"value":155},"cifar_ds = ds.Cifar10Dataset(DATA_DIR)",{"type":18,"tag":37,"props":157,"children":158},{},[159],{"type":24,"value":160},"数据集加载好之后，就可以调用接口create_dict_iterator()创建迭代器读取数据，后面两种方式同理。",{"type":18,"tag":37,"props":162,"children":163},{},[164,166],{"type":24,"value":165},"for data in cifar_ds.create_dict_iterator():",{"type":18,"tag":31,"props":167,"children":168},{},[169],{"type":24,"value":170},"# In CIFAR-10 dataset, each dictionary of data has keys \"image\" and \"label\".",{"type":18,"tag":37,"props":172,"children":173},{},[174],{"type":24,"value":175},"print(data[\"image\"])",{"type":18,"tag":37,"props":177,"children":178},{},[179],{"type":24,"value":180},"print(data[\"label\"])",{"type":18,"tag":111,"props":182,"children":184},{"id":183},"_22-特定格式数据集加载",[185],{"type":18,"tag":31,"props":186,"children":187},{},[188,196,197],{"type":18,"tag":31,"props":189,"children":190},{},[191],{"type":18,"tag":31,"props":192,"children":193},{},[194],{"type":24,"value":195},"2.2",{"type":24,"value":127},{"type":18,"tag":31,"props":198,"children":199},{},[200],{"type":18,"tag":31,"props":201,"children":202},{},[203],{"type":24,"value":204},"特定格式数据集加载",{"type":18,"tag":37,"props":206,"children":207},{},[208],{"type":24,"value":209},"目前支持的特定格式数据集为：MindRecord。MindRecord格式的数据读取性能更优，推荐用户将数据转换为MindRecord格式。转换示例如下：",{"type":18,"tag":37,"props":211,"children":212},{},[213],{"type":24,"value":214},"from mindspore.mindrecord import Cifar10ToMR",{"type":18,"tag":37,"props":216,"children":217},{},[218],{"type":24,"value":219},"cifar10_path = \"./cifar-10-batches-py\"",{"type":18,"tag":37,"props":221,"children":222},{},[223],{"type":24,"value":224},"mindrecord_path = \"./cifar10.mindrecord\"",{"type":18,"tag":37,"props":226,"children":227},{},[228],{"type":24,"value":229},"cifar10_transformer = Cifar10ToMR(cifar10_path, mindrecord_path)",{"type":18,"tag":37,"props":231,"children":232},{},[233],{"type":24,"value":234},"cifar10_transformer.transform([\"label\"])",{"type":18,"tag":37,"props":236,"children":237},{},[238],{"type":24,"value":239},"MindRecord数据加载：",{"type":18,"tag":37,"props":241,"children":242},{},[243],{"type":24,"value":145},{"type":18,"tag":37,"props":245,"children":246},{},[247],{"type":24,"value":248},"CV_FILE_NAME = \"./cifar10.mindrecord\"",{"type":18,"tag":37,"props":250,"children":251},{},[252],{"type":24,"value":253},"cifar_ds = ds.MindDataset(dataset_file=CV_FILE_NAME，columns_list=[\"data\",\"label\"], shuffle=True)",{"type":18,"tag":111,"props":255,"children":257},{"id":256},"_23-自定义数据集加载",[258],{"type":18,"tag":31,"props":259,"children":260},{},[261,269,270],{"type":18,"tag":31,"props":262,"children":263},{},[264],{"type":18,"tag":31,"props":265,"children":266},{},[267],{"type":24,"value":268},"2.3",{"type":24,"value":127},{"type":18,"tag":31,"props":271,"children":272},{},[273],{"type":18,"tag":31,"props":274,"children":275},{},[276],{"type":24,"value":277},"自定义数据集加载",{"type":18,"tag":37,"props":279,"children":280},{},[281],{"type":24,"value":282},"提供的自定义数据集加载方式为：GeneratorDataset接口。GeneratorDataset接口需要自己实现一个生成器，生成训练数据和标签，适用于较复杂的任务。",{"type":18,"tag":37,"props":284,"children":285},{},[286],{"type":24,"value":287},"GeneratorDataset()需要传入一个生成器，生成训练数据。",{"type":18,"tag":37,"props":289,"children":290},{},[291],{"type":24,"value":145},{"type":18,"tag":37,"props":293,"children":294},{},[295],{"type":24,"value":296},"class Dataset:",{"type":18,"tag":37,"props":298,"children":299},{},[300],{"type":24,"value":301},"def __init__(self, image_list, label_list):",{"type":18,"tag":37,"props":303,"children":304},{},[305],{"type":24,"value":306},"super(Dataset, self).__init__()",{"type":18,"tag":37,"props":308,"children":309},{},[310],{"type":24,"value":311},"self.imgs = image_list",{"type":18,"tag":37,"props":313,"children":314},{},[315],{"type":24,"value":316},"self.labels = label_list",{"type":18,"tag":37,"props":318,"children":319},{},[320],{"type":24,"value":321},"def __getitem__(self, index):",{"type":18,"tag":37,"props":323,"children":324},{},[325],{"type":24,"value":326},"img = Image.open(self.imgs[index]).convert('RGB')",{"type":18,"tag":37,"props":328,"children":329},{},[330],{"type":24,"value":331},"return img, self.labels[index]",{"type":18,"tag":37,"props":333,"children":334},{},[335],{"type":24,"value":336},"def __len__(self):",{"type":18,"tag":37,"props":338,"children":339},{},[340],{"type":24,"value":341},"return len(self.imgs)",{"type":18,"tag":37,"props":343,"children":344},{},[345],{"type":24,"value":346},"class MySampler():",{"type":18,"tag":37,"props":348,"children":349},{},[350],{"type":24,"value":351},"def __init__(self, dataset):",{"type":18,"tag":37,"props":353,"children":354},{},[355],{"type":24,"value":356},"self.__num_data = len(dataset)",{"type":18,"tag":37,"props":358,"children":359},{},[360],{"type":24,"value":361},"def __iter__(self):",{"type":18,"tag":37,"props":363,"children":364},{},[365],{"type":24,"value":366},"indices = list(range(self.__num_data))",{"type":18,"tag":37,"props":368,"children":369},{},[370],{"type":24,"value":371},"return iter(indices)",{"type":18,"tag":37,"props":373,"children":374},{},[375],{"type":24,"value":376},"dataset = Dataset(save_image_list, save_label_list)",{"type":18,"tag":37,"props":378,"children":379},{},[380],{"type":24,"value":381},"sampler = MySampler(dataset)",{"type":18,"tag":37,"props":383,"children":384},{},[385],{"type":24,"value":386},"cifar_ds = ds.GeneratorDataset(dataset,",{"type":18,"tag":37,"props":388,"children":389},{},[390],{"type":24,"value":391},"column_names=[\"image\", \"label\"], sampler=sampler, shuffle=True)",{"type":18,"tag":37,"props":393,"children":394},{},[395],{"type":24,"value":396},"以上例子中 dataset是一个生成器，产生image和label。",{"type":18,"tag":26,"props":398,"children":400},{"id":399},"_2数据增强",[401],{"type":18,"tag":31,"props":402,"children":403},{},[404],{"type":18,"tag":31,"props":405,"children":406},{},[407],{"type":18,"tag":31,"props":408,"children":409},{},[410],{"type":24,"value":411},"2、数据增强",{"type":18,"tag":37,"props":413,"children":414},{},[415],{"type":24,"value":416},"提供 c_transforms 和 py_transforms 两个模块来供用户完成数据增强操作，两者的对比如下:",{"type":18,"tag":37,"props":418,"children":419},{},[420],{"type":18,"tag":31,"props":421,"children":422},{},[423],{"type":18,"tag":31,"props":424,"children":425},{},[426],{"type":24,"value":427},"模块名称",{"type":18,"tag":37,"props":429,"children":430},{},[431],{"type":18,"tag":31,"props":432,"children":433},{},[434],{"type":18,"tag":31,"props":435,"children":436},{},[437],{"type":24,"value":438},"实现",{"type":18,"tag":37,"props":440,"children":441},{},[442],{"type":18,"tag":31,"props":443,"children":444},{},[445],{"type":18,"tag":31,"props":446,"children":447},{},[448],{"type":24,"value":449},"优缺点",{"type":18,"tag":37,"props":451,"children":452},{},[453],{"type":24,"value":454},"c_transforms",{"type":18,"tag":37,"props":456,"children":457},{},[458],{"type":24,"value":459},"基于C++的OpenCV实现",{"type":18,"tag":37,"props":461,"children":462},{},[463],{"type":24,"value":464},"性能较高",{"type":18,"tag":37,"props":466,"children":467},{},[468],{"type":24,"value":469},"py_transforms",{"type":18,"tag":37,"props":471,"children":472},{},[473],{"type":24,"value":474},"基于Python的PIL实现",{"type":18,"tag":37,"props":476,"children":477},{},[478],{"type":24,"value":479},"性能较差，但是可以自定义增强函数",{"type":18,"tag":37,"props":481,"children":482},{},[483],{"type":24,"value":484},"使用建议：如果不需要自定义增强函数，并且c_transforms中有对应的实现，建议使用c_transforms模块。",{"type":18,"tag":111,"props":486,"children":488},{"id":487},"_21-c_transforms模块",[489],{"type":18,"tag":31,"props":490,"children":491},{},[492,499,500],{"type":18,"tag":31,"props":493,"children":494},{},[495],{"type":18,"tag":31,"props":496,"children":497},{},[498],{"type":24,"value":125},{"type":24,"value":127},{"type":18,"tag":31,"props":501,"children":502},{},[503],{"type":18,"tag":31,"props":504,"children":505},{},[506],{"type":24,"value":507},"c_transforms模块",{"type":18,"tag":37,"props":509,"children":510},{},[511],{"type":24,"value":512},"目前c_transforms接口包括两部分：mindspore.dataset.transforms.c_transforms和mindspore.dataset.vision.c_transforms。",{"type":18,"tag":37,"props":514,"children":515},{},[516],{"type":24,"value":517},"使用方法：",{"type":18,"tag":37,"props":519,"children":520},{},[521],{"type":24,"value":522},"1.定义好数据增强函数：把多个增强函数加入到一个list中，并调用Compose封装；",{"type":18,"tag":37,"props":524,"children":525},{},[526],{"type":24,"value":527},"2.调用dataset.map()函数,将定义好的函数或算子作用于指定的数据列。",{"type":18,"tag":37,"props":529,"children":530},{},[531],{"type":24,"value":532},"示例代码如下：",{"type":18,"tag":37,"props":534,"children":535},{},[536],{"type":24,"value":537},"import mindspore.dataset as dsimport mindspore.dataset.vision.c_transforms as CV_transformsimport mindspore.dataset.transforms.c_transforms as C_transforms",{"type":18,"tag":37,"props":539,"children":540},{},[541],{"type":24,"value":150},{"type":18,"tag":37,"props":543,"children":544},{},[545],{"type":24,"value":546},"cifar_ds = ds.Cifar10Dataset(DATA_DIR, shuffle=True, usage='train')__#定义增强__函数列表",{"type":18,"tag":37,"props":548,"children":549},{},[550],{"type":24,"value":551},"transforms_list = C_transforms.Compose[",{"type":18,"tag":37,"props":553,"children":554},{},[555],{"type":24,"value":556},"CV_transforms.RandomCrop((32, 32), (4, 4, 4, 4)),",{"type":18,"tag":37,"props":558,"children":559},{},[560],{"type":24,"value":561},"CV_transforms.RandomHorizontalFlip(),",{"type":18,"tag":37,"props":563,"children":564},{},[565],{"type":24,"value":566},"CV_transforms.Rescale(rescale, shift),",{"type":18,"tag":37,"props":568,"children":569},{},[570],{"type":24,"value":571},"CV_transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),",{"type":18,"tag":37,"props":573,"children":574},{},[575,577],{"type":24,"value":576},"CV_transforms.HWC2CHW()]",{"type":18,"tag":31,"props":578,"children":579},{},[580],{"type":24,"value":581},"#调用map()函数",{"type":18,"tag":37,"props":583,"children":584},{},[585],{"type":24,"value":586},"cifar_ds = cifar_ds.map(operations=transforms_list, input_columns=\"image\")",{"type":18,"tag":37,"props":588,"children":589},{},[590],{"type":24,"value":591},"其中，input_columns为指定要做增强的数据列，operations为定义的增强函数。",{"type":18,"tag":111,"props":593,"children":595},{"id":594},"_22-py_transforms模块",[596],{"type":18,"tag":31,"props":597,"children":598},{},[599,606,607],{"type":18,"tag":31,"props":600,"children":601},{},[602],{"type":18,"tag":31,"props":603,"children":604},{},[605],{"type":24,"value":195},{"type":24,"value":127},{"type":18,"tag":31,"props":608,"children":609},{},[610],{"type":18,"tag":31,"props":611,"children":612},{},[613],{"type":24,"value":614},"py_transforms模块",{"type":18,"tag":37,"props":616,"children":617},{},[618],{"type":24,"value":619},"py_transforms接口也包括两部分mindspore.dataset.transforms.py_transforms和mindspore.dataset.vision.py_transforms。",{"type":18,"tag":37,"props":621,"children":622},{},[623],{"type":24,"value":624},"使用方法：和c_transforms模块中的使用方法类似。示例代码如下：",{"type":18,"tag":37,"props":626,"children":627},{},[628],{"type":24,"value":629},"import mindspore.dataset as dsimport mindspore.dataset.vision.py_transforms as py_visionimport mindspore.dataset.transforms.py_transforms as py_transforms",{"type":18,"tag":37,"props":631,"children":632},{},[633],{"type":24,"value":150},{"type":18,"tag":37,"props":635,"children":636},{},[637],{"type":24,"value":638},"cifar_ds = ds.Cifar10Dataset(DATA_DIR, shuffle=True, usage='train')",{"type":18,"tag":37,"props":640,"children":641},{},[642],{"type":24,"value":643},"transform_list = py_transforms.Compose([",{"type":18,"tag":37,"props":645,"children":646},{},[647],{"type":24,"value":648},"py_vision.ToPIL(),",{"type":18,"tag":37,"props":650,"children":651},{},[652],{"type":24,"value":653},"py_vision.RandomCrop((32, 32), (4, 4, 4, 4)),",{"type":18,"tag":37,"props":655,"children":656},{},[657],{"type":24,"value":658},"py_vision.RandomHorizontalFlip(),",{"type":18,"tag":37,"props":660,"children":661},{},[662],{"type":24,"value":663},"py_vision.ToTensor(),",{"type":18,"tag":37,"props":665,"children":666},{},[667],{"type":24,"value":668},"py_vision.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])",{"type":18,"tag":37,"props":670,"children":671},{},[672],{"type":24,"value":586},{"type":18,"tag":37,"props":674,"children":675},{},[676],{"type":24,"value":677},"使用py_transforms自定义增强函数：",{"type":18,"tag":37,"props":679,"children":680},{},[681,683,692],{"type":24,"value":682},"自定义增强函数可参考MindSpore源码中的",{"type":18,"tag":684,"props":685,"children":689},"a",{"href":686,"rel":687},"https://gitee.com/mindspore/mindspore/blob/master/mindspore/python/mindspore/dataset/transforms/py_transforms_util.py",[688],"nofollow",[690],{"type":24,"value":691},"py_transforms_util.py",{"type":24,"value":693},"脚本。下面以RandomBrightness为例，说明自定义增强算子的定义方式：",{"type":18,"tag":37,"props":695,"children":696},{},[697],{"type":24,"value":698},"__#自定义增强函数定义__class RandomBrightness(object):",{"type":18,"tag":37,"props":700,"children":701},{},[702],{"type":24,"value":703},"\"\"\"",{"type":18,"tag":37,"props":705,"children":706},{},[707],{"type":24,"value":708},"Randomly adjust the brightness of the input image.",{"type":18,"tag":37,"props":710,"children":711},{},[712],{"type":24,"value":713},"Args:",{"type":18,"tag":37,"props":715,"children":716},{},[717],{"type":24,"value":718},"brightness (float): Brightness adjustment factor (default=0.0).",{"type":18,"tag":37,"props":720,"children":721},{},[722],{"type":24,"value":723},"Returns:",{"type":18,"tag":37,"props":725,"children":726},{},[727],{"type":24,"value":728},"numpy.ndarray, image.",{"type":18,"tag":37,"props":730,"children":731},{},[732],{"type":24,"value":703},{"type":18,"tag":37,"props":734,"children":735},{},[736],{"type":24,"value":737},"def __init__(self, brightness=0.0):",{"type":18,"tag":37,"props":739,"children":740},{},[741],{"type":24,"value":742},"self.brightness = brightness",{"type":18,"tag":37,"props":744,"children":745},{},[746],{"type":24,"value":747},"def __call__(self, img):",{"type":18,"tag":37,"props":749,"children":750},{},[751],{"type":24,"value":752},"alpha = random.uniform(-self.brightness, self.brightness)",{"type":18,"tag":37,"props":754,"children":755},{},[756],{"type":24,"value":757},"return (1-alpha) * img",{"type":18,"tag":37,"props":759,"children":760},{},[761,763,769],{"type":24,"value":762},"自定义算子的调用和",{"type":18,"tag":684,"props":764,"children":767},{"href":765,"rel":766},"https://gitee.com/mindspore/mindspore/blob/master/mindspore/python/mindspore/dataset/vision/py_transforms_util.py",[688],[768],{"type":24,"value":691},{"type":24,"value":770},"中的算子调用没有区别。",{"type":18,"tag":26,"props":772,"children":774},{"id":773},"_3数据处理",[775],{"type":18,"tag":31,"props":776,"children":777},{},[778],{"type":18,"tag":31,"props":779,"children":780},{},[781],{"type":18,"tag":31,"props":782,"children":783},{},[784],{"type":24,"value":785},"3、数据处理",{"type":18,"tag":37,"props":787,"children":788},{},[789],{"type":24,"value":790},"数据处理操作有：zip、shuffle、map、batch、repeat。",{"type":18,"tag":37,"props":792,"children":793},{},[794],{"type":18,"tag":31,"props":795,"children":796},{},[797],{"type":18,"tag":31,"props":798,"children":799},{},[800],{"type":24,"value":801},"数据处理操作",{"type":18,"tag":37,"props":803,"children":804},{},[805],{"type":18,"tag":31,"props":806,"children":807},{},[808],{"type":18,"tag":31,"props":809,"children":810},{},[811],{"type":24,"value":812},"说明",{"type":18,"tag":37,"props":814,"children":815},{},[816],{"type":24,"value":817},"zip",{"type":18,"tag":37,"props":819,"children":820},{},[821],{"type":24,"value":822},"合并多个数据集",{"type":18,"tag":37,"props":824,"children":825},{},[826],{"type":24,"value":827},"shuffle",{"type":18,"tag":37,"props":829,"children":830},{},[831],{"type":24,"value":832},"混洗数据",{"type":18,"tag":37,"props":834,"children":835},{},[836],{"type":24,"value":837},"map",{"type":18,"tag":37,"props":839,"children":840},{},[841],{"type":24,"value":842},"将函数和算子作用于指定列数据",{"type":18,"tag":37,"props":844,"children":845},{},[846],{"type":24,"value":847},"batch",{"type":18,"tag":37,"props":849,"children":850},{},[851],{"type":24,"value":852},"将数据分批，每次迭代返回一个batch的数据",{"type":18,"tag":37,"props":854,"children":855},{},[856],{"type":24,"value":857},"repeat",{"type":18,"tag":37,"props":859,"children":860},{},[861],{"type":24,"value":862},"对数据集进行复制",{"type":18,"tag":37,"props":864,"children":865},{},[866],{"type":24,"value":867},"一般训练过程中都会用到shuffle、map、batch、repeat，如下示例：",{"type":18,"tag":37,"props":869,"children":870},{},[871],{"type":24,"value":537},{"type":18,"tag":37,"props":873,"children":874},{},[875],{"type":24,"value":150},{"type":18,"tag":37,"props":877,"children":878},{},[879],{"type":24,"value":638},{"type":18,"tag":37,"props":881,"children":882},{},[883],{"type":24,"value":884},"transform_list = C.Compose([",{"type":18,"tag":37,"props":886,"children":887},{},[888],{"type":24,"value":889},"CV.RandomCrop((32, 32), (4, 4, 4, 4)),",{"type":18,"tag":37,"props":891,"children":892},{},[893],{"type":24,"value":894},"CV.RandomHorizontalFlip(),",{"type":18,"tag":37,"props":896,"children":897},{},[898],{"type":24,"value":899},"CV.Rescale(rescale, shift),",{"type":18,"tag":37,"props":901,"children":902},{},[903],{"type":24,"value":904},"CV.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),",{"type":18,"tag":37,"props":906,"children":907},{},[908,910],{"type":24,"value":909},"CV.HWC2CHW()])",{"type":18,"tag":31,"props":911,"children":912},{},[913],{"type":24,"value":914},"# map()",{"type":18,"tag":37,"props":916,"children":917},{},[918,920],{"type":24,"value":919},"cifar_ds.map(input_columns=\"image\", operations=transforms_list)",{"type":18,"tag":31,"props":921,"children":922},{},[923],{"type":24,"value":924},"# batch()",{"type":18,"tag":37,"props":926,"children":927},{},[928,930],{"type":24,"value":929},"cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True)",{"type":18,"tag":31,"props":931,"children":932},{},[933],{"type":24,"value":934},"# repeat()",{"type":18,"tag":37,"props":936,"children":937},{},[938],{"type":24,"value":939},"cifar_ds = cifar_ds.repeat(repeat_num)",{"type":18,"tag":37,"props":941,"children":942},{},[943],{"type":24,"value":944},"在实际使用过程中，需要组合使用这几个操作时，为达到最优性能，推荐按照如下顺序： 数据集加载并shuffle -> map -> batch -> repeat。",{"type":18,"tag":37,"props":946,"children":947},{},[948],{"type":24,"value":949},"以下简单介绍一下数据处理函数的使用方法：",{"type":18,"tag":951,"props":952,"children":954},"h4",{"id":953},"_31数据集加载与shuffle",[955,973],{"type":18,"tag":31,"props":956,"children":957},{},[958,966,968],{"type":18,"tag":31,"props":959,"children":960},{},[961],{"type":18,"tag":31,"props":962,"children":963},{},[964],{"type":24,"value":965},"3.1",{"type":24,"value":967},"**",{"type":18,"tag":31,"props":969,"children":970},{},[971],{"type":24,"value":972},"数据集加载与shuffle",{"type":24,"value":967},{"type":18,"tag":37,"props":975,"children":976},{},[977],{"type":24,"value":978},"方式一：加载数据集时shuffle",{"type":18,"tag":37,"props":980,"children":981},{},[982],{"type":24,"value":145},{"type":18,"tag":37,"props":984,"children":985},{},[986],{"type":24,"value":150},{"type":18,"tag":37,"props":988,"children":989},{},[990],{"type":24,"value":638},{"type":18,"tag":37,"props":992,"children":993},{},[994],{"type":24,"value":995},"方式二：加载数据集后shuffle",{"type":18,"tag":37,"props":997,"children":998},{},[999],{"type":24,"value":145},{"type":18,"tag":37,"props":1001,"children":1002},{},[1003],{"type":24,"value":150},{"type":18,"tag":37,"props":1005,"children":1006},{},[1007],{"type":24,"value":1008},"cifar_ds = ds.Cifar10Dataset(DATA_DIR, usage='train')",{"type":18,"tag":37,"props":1010,"children":1011},{},[1012],{"type":24,"value":1013},"cifar_ds = cifar_ds.shuffle(buffer_size=10000)",{"type":18,"tag":37,"props":1015,"children":1016},{},[1017],{"type":24,"value":1018},"参数说明：",{"type":18,"tag":37,"props":1020,"children":1021},{},[1022],{"type":24,"value":1023},"buffer_size:buffer_size越大，混洗程度越大，时间消耗更大",{"type":18,"tag":951,"props":1025,"children":1027},{"id":1026},"_32-map",[1028,1041],{"type":18,"tag":31,"props":1029,"children":1030},{},[1031,1039],{"type":18,"tag":31,"props":1032,"children":1033},{},[1034],{"type":18,"tag":31,"props":1035,"children":1036},{},[1037],{"type":24,"value":1038},"3.2 m",{"type":24,"value":1040},"****ap",{"type":24,"value":1042},"******：******",{"type":18,"tag":37,"props":1044,"children":1045},{},[1046],{"type":24,"value":1047},"func = lambda x : x*2",{"type":18,"tag":37,"props":1049,"children":1050},{},[1051],{"type":24,"value":1052},"cifar_ds = cifar_ds.map(input_columns=\"data\", operations=func)",{"type":18,"tag":37,"props":1054,"children":1055},{},[1056],{"type":24,"value":1057},"参数说明:",{"type":18,"tag":37,"props":1059,"children":1060},{},[1061],{"type":24,"value":1062},"input_columns:函数作用的列数据",{"type":18,"tag":37,"props":1064,"children":1065},{},[1066],{"type":24,"value":1067},"operations：对数据做操作的函数",{"type":18,"tag":951,"props":1069,"children":1071},{"id":1070},"_33-batch",[1072],{"type":18,"tag":31,"props":1073,"children":1074},{},[1075,1083,1084],{"type":18,"tag":31,"props":1076,"children":1077},{},[1078],{"type":18,"tag":31,"props":1079,"children":1080},{},[1081],{"type":24,"value":1082},"3.3",{"type":24,"value":127},{"type":18,"tag":31,"props":1085,"children":1086},{},[1087],{"type":18,"tag":31,"props":1088,"children":1089},{},[1090],{"type":24,"value":847},{"type":18,"tag":37,"props":1092,"children":1093},{},[1094],{"type":24,"value":1095},"cifar_ds = cifar_ds.batch(batch_size=32, drop_remainder=True, num_parallel_workers=4)",{"type":18,"tag":37,"props":1097,"children":1098},{},[1099],{"type":24,"value":1018},{"type":18,"tag":37,"props":1101,"children":1102},{},[1103],{"type":24,"value":1104},"drop_remainder:舍弃最后不完整的batch",{"type":18,"tag":37,"props":1106,"children":1107},{},[1108],{"type":24,"value":1109},"num_parallel_workers: 用几个线程来读取数据",{"type":18,"tag":951,"props":1111,"children":1113},{"id":1112},"_34-repeat",[1114],{"type":18,"tag":31,"props":1115,"children":1116},{},[1117,1125,1126],{"type":18,"tag":31,"props":1118,"children":1119},{},[1120],{"type":18,"tag":31,"props":1121,"children":1122},{},[1123],{"type":24,"value":1124},"3.4",{"type":24,"value":127},{"type":18,"tag":31,"props":1127,"children":1128},{},[1129],{"type":18,"tag":31,"props":1130,"children":1131},{},[1132],{"type":24,"value":857},{"type":18,"tag":37,"props":1134,"children":1135},{},[1136],{"type":24,"value":1137},"cifar_ds = cifar_ds.repeat(count=2)",{"type":18,"tag":37,"props":1139,"children":1140},{},[1141],{"type":24,"value":1018},{"type":18,"tag":37,"props":1143,"children":1144},{},[1145],{"type":24,"value":1146},"count： 数据集复制数量",{"type":18,"tag":951,"props":1148,"children":1150},{"id":1149},"_35-zip",[1151],{"type":18,"tag":31,"props":1152,"children":1153},{},[1154,1162,1163],{"type":18,"tag":31,"props":1155,"children":1156},{},[1157],{"type":18,"tag":31,"props":1158,"children":1159},{},[1160],{"type":24,"value":1161},"3.5",{"type":24,"value":127},{"type":18,"tag":31,"props":1164,"children":1165},{},[1166],{"type":18,"tag":31,"props":1167,"children":1168},{},[1169],{"type":24,"value":817},{"type":18,"tag":37,"props":1171,"children":1172},{},[1173],{"type":24,"value":145},{"type":18,"tag":37,"props":1175,"children":1176},{},[1177],{"type":24,"value":1178},"DATA_DIR_1 = \"custom_dataset_dir_1/\"",{"type":18,"tag":37,"props":1180,"children":1181},{},[1182],{"type":24,"value":1183},"DATA_DIR_2 = \"custom_dataset_dir_2/\"",{"type":18,"tag":37,"props":1185,"children":1186},{},[1187],{"type":24,"value":1188},"imagefolder_dataset_1 = ds.ImageFolderDatasetV2(DATA_DIR_1)",{"type":18,"tag":37,"props":1190,"children":1191},{},[1192],{"type":24,"value":1193},"imagefolder_dataset_2 = ds.ImageFolderDatasetV2(DATA_DIR_2)",{"type":18,"tag":37,"props":1195,"children":1196},{},[1197],{"type":24,"value":1198},"imagefolder_dataset = ds.zip((imagefolder_dataset_1, imagefolder_dataset_2))",{"title":7,"searchDepth":1200,"depth":1200,"links":1201},4,[1202,1204,1205,1214,1220],{"id":28,"depth":1203,"text":35},2,{"id":64,"depth":1203,"text":70},{"id":73,"depth":1203,"text":85,"children":1206},[1207,1210,1212],{"id":113,"depth":1208,"text":1209},3,"2.1 常用数据集加载",{"id":183,"depth":1208,"text":1211},"2.2 特定格式数据集加载",{"id":256,"depth":1208,"text":1213},"2.3 自定义数据集加载",{"id":399,"depth":1203,"text":411,"children":1215},[1216,1218],{"id":487,"depth":1208,"text":1217},"2.1 c_transforms模块",{"id":594,"depth":1208,"text":1219},"2.2 py_transforms模块",{"id":773,"depth":1203,"text":785,"children":1221},[1222,1224,1226,1228,1230],{"id":953,"depth":1200,"text":1223},"3.1**数据集加载与shuffle**",{"id":1026,"depth":1200,"text":1225},"3.2 m****ap******：******",{"id":1070,"depth":1200,"text":1227},"3.3 batch",{"id":1112,"depth":1200,"text":1229},"3.4 repeat",{"id":1149,"depth":1200,"text":1231},"3.5 zip","markdown","content:technology-blogs:zh:1693.md","content","technology-blogs/zh/1693.md","technology-blogs/zh/1693","md",1776506114908]