[{"data":1,"prerenderedAt":644},["ShallowReactive",2],{"content-query-Zwcdvfkrod":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"category":13,"body":14,"_type":638,"_id":639,"_source":640,"_file":641,"_stem":642,"_extension":643},"/technology-blogs/zh/1774","zh",false,"","MindSpore易点通·精讲系列--数据处理之vision.c_transforms.Decode","探究了vision.c_transforms下的Decode算子，并结合一个数据生成和读取的案例进一步讲述如何使用该算子。","2022-08-30","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2022/09/06/171220566384487abbd51af8c91460b1.png","technology-blogs","开发者分享",{"type":15,"children":16,"toc":619},"root",[17,25,31,36,41,61,66,94,101,110,115,121,128,133,138,148,153,161,167,172,177,181,189,194,202,215,221,226,239,244,252,265,273,286,294,299,307,312,318,323,331,336,344,349,354,362,367,372,378,383,388,394,399,407,412,430,435,439,447,452,460,465,473,479,484,492,497,501,509,513,521,527,540,544,549,557,561,569,589,594,599,604],{"type":18,"tag":19,"props":20,"children":22},"element","h1",{"id":21},"mindspore易点通精讲系列-数据处理之visionc_transformsdecode",[23],{"type":24,"value":8},"text",{"type":18,"tag":19,"props":26,"children":28},{"id":27},"dive-into-mindspore-visionc_transformsdecode-for-data-processing",[29],{"type":24,"value":30},"Dive Into MindSpore -- vision.c_transforms.Decode For Data Processing",{"type":18,"tag":32,"props":33,"children":34},"p",{},[35],{"type":24,"value":8},{"type":18,"tag":32,"props":37,"children":38},{},[39],{"type":24,"value":40},"本文开发环境",{"type":18,"tag":42,"props":43,"children":44},"ul",{},[45,51,56],{"type":18,"tag":46,"props":47,"children":48},"li",{},[49],{"type":24,"value":50},"Ubuntu 20.04",{"type":18,"tag":46,"props":52,"children":53},{},[54],{"type":24,"value":55},"Python 3.8",{"type":18,"tag":46,"props":57,"children":58},{},[59],{"type":24,"value":60},"MindSpore 1.7.0",{"type":18,"tag":32,"props":62,"children":63},{},[64],{"type":24,"value":65},"本文内容摘要",{"type":18,"tag":42,"props":67,"children":68},{},[69,74,79,84,89],{"type":18,"tag":46,"props":70,"children":71},{},[72],{"type":24,"value":73},"先看官方文档",{"type":18,"tag":46,"props":75,"children":76},{},[77],{"type":24,"value":78},"再谈两种试错",{"type":18,"tag":46,"props":80,"children":81},{},[82],{"type":24,"value":83},"探究官方源码",{"type":18,"tag":46,"props":85,"children":86},{},[87],{"type":24,"value":88},"验证探究结果",{"type":18,"tag":46,"props":90,"children":91},{},[92],{"type":24,"value":93},"实战案例分析",{"type":18,"tag":95,"props":96,"children":98},"h2",{"id":97},"_1-先看官方文档",[99],{"type":24,"value":100},"1. 先看官方文档",{"type":18,"tag":32,"props":102,"children":103},{},[104],{"type":18,"tag":105,"props":106,"children":109},"img",{"alt":107,"src":108},"2022_05_01_c_transfroms_decode.png","https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/00f/46e/1fc/08bf6aef1e00f46e1fc9c01674512545.20220829114916.17943879761321928199780741846424:50530905085336:2400:8445A6EA31B1EF0495E4D4316C237E48E259419A0D74910BBEAD5B2D3A64A735.png",[],{"type":18,"tag":32,"props":111,"children":112},{},[113],{"type":24,"value":114},"从官方文档可以获取到的有效信息只有RGB参数，而且该参数还只有一个默认值True。样例部分只知道可以用到dataset的map方法中，至于输入是什么样的，输出是什么样的，文档并没有给出明确说明。不过从样例来看，直觉上觉得输入应该是个文件，顺着这个思路，开始进行尝试。",{"type":18,"tag":95,"props":116,"children":118},{"id":117},"_2-再谈两种试错",[119],{"type":24,"value":120},"2. 再谈两种试错",{"type":18,"tag":122,"props":123,"children":125},"h3",{"id":124},"_21-试错一",[126],{"type":24,"value":127},"2.1 试错一",{"type":18,"tag":32,"props":129,"children":130},{},[131],{"type":24,"value":132},"输入是文件名？",{"type":18,"tag":32,"props":134,"children":135},{},[136],{"type":24,"value":137},"测试代码如下：",{"type":18,"tag":139,"props":140,"children":142},"pre",{"code":141},"from mindspore.dataset.vision.c_transforms import Decode\n\nimage_file = \"/Users/kaierlong/Downloads/ms_demos/ms_gan/image/0000.jpg\"\ndecode_op = Decode()\nout = decode_op(image_file)\n\nprint(\"=== out: ===\\n{}\".format(out), flush=True)\n",[143],{"type":18,"tag":144,"props":145,"children":146},"code",{"__ignoreMap":7},[147],{"type":24,"value":141},{"type":18,"tag":32,"props":149,"children":150},{},[151],{"type":24,"value":152},"果不其然，直觉不可靠，报错如下：",{"type":18,"tag":139,"props":154,"children":156},{"code":155},"Traceback (most recent call last):\n  File \"/Users/kaierlong/test_decode.py\", line 5, in \n    out = decode_op(image_file)\n  File \"/Users/kaierlong/Documents/PyEnv/env_ms_1.7.0/lib/python3.9/site-packages/mindspore/dataset/vision/c_transforms.py\", line 594, in __call__\n    raise TypeError(\nTypeError: Input should be an encoded image in 1-D NumPy format, got .\n",[157],{"type":18,"tag":144,"props":158,"children":159},{"__ignoreMap":7},[160],{"type":24,"value":155},{"type":18,"tag":122,"props":162,"children":164},{"id":163},"_22-试错二",[165],{"type":24,"value":166},"2.2 试错二",{"type":18,"tag":32,"props":168,"children":169},{},[170],{"type":24,"value":171},"输入是Numpy？",{"type":18,"tag":32,"props":173,"children":174},{},[175],{"type":24,"value":176},"根据试错一的错误提示，Input应该是1维numpy数据，顺着这个思路进行再次测试。",{"type":18,"tag":32,"props":178,"children":179},{},[180],{"type":24,"value":137},{"type":18,"tag":139,"props":182,"children":184},{"code":183},"import numpy as np\n\nfrom PIL import Image\nfrom mindspore.dataset.vision.c_transforms import Decode\n\nimage_file = \"/Users/kaierlong/Downloads/ms_demos/ms_gan/image/0000.jpg\"\nimage = np.array(Image.open(image_file)).reshape(-1)\ndecode_op = Decode()\nout = decode_op(image)\n\nprint(\"=== out: ===\\n{}\".format(out), flush=True)\n",[185],{"type":18,"tag":144,"props":186,"children":187},{"__ignoreMap":7},[188],{"type":24,"value":183},{"type":18,"tag":32,"props":190,"children":191},{},[192],{"type":24,"value":193},"报错如下：",{"type":18,"tag":139,"props":195,"children":197},{"code":196},"Traceback (most recent call last):\n  File \"/Users/kaierlong/test_decode.py\", line 9, in \n    out = decode_op(image)\n  File \"/Users/kaierlong/Documents/PyEnv/env_ms_1.7.0/lib/python3.9/site-packages/mindspore/dataset/vision/c_transforms.py\", line 596, in __call__\n    return super().__call__(img)\n  File \"/Users/kaierlong/Documents/PyEnv/env_ms_1.7.0/lib/python3.9/site-packages/mindspore/dataset/vision/c_transforms.py\", line 72, in __call__\n    return super().__call__(*input_tensor_list)\n  File \"/Users/kaierlong/Documents/PyEnv/env_ms_1.7.0/lib/python3.9/site-packages/mindspore/dataset/transforms/c_transforms.py\", line 43, in __call__\n    output_tensor_list = callable_op(tensor_row)\nRuntimeError: Unexpected error. Decode: image decode failed.\nLine of code : 236\nFile         : /Users/jenkins/agent-working-dir/workspace/Compile_CPU_X86_MacOS_PY39/mindspore/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.cc\n",[198],{"type":18,"tag":144,"props":199,"children":200},{"__ignoreMap":7},[201],{"type":24,"value":196},{"type":18,"tag":32,"props":203,"children":204},{},[205,207,213],{"type":24,"value":206},"这个报错更深入，直接是",{"type":18,"tag":144,"props":208,"children":210},{"className":209},[],[211],{"type":24,"value":212},"RuntimeError: Unexpected error. Decode: image decode failed.",{"type":24,"value":214},"，看来要去研究一下官方源码。",{"type":18,"tag":95,"props":216,"children":218},{"id":217},"_3-探究官方源码",[219],{"type":24,"value":220},"3. 探究官方源码",{"type":18,"tag":32,"props":222,"children":223},{},[224],{"type":24,"value":225},"mindspore源码分支为v1.7.0",{"type":18,"tag":32,"props":227,"children":228},{},[229,231,237],{"type":24,"value":230},"在官方文档的样例部分，是对",{"type":18,"tag":144,"props":232,"children":234},{"className":233},[],[235],{"type":24,"value":236},"image_folder_dataset",{"type":24,"value":238},"进行操作。所以决定深入该源码一探究竟。",{"type":18,"tag":32,"props":240,"children":241},{},[242],{"type":24,"value":243},"源码文件位置：",{"type":18,"tag":139,"props":245,"children":247},{"code":246},"mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.h\nmindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc\n",[248],{"type":18,"tag":144,"props":249,"children":250},{"__ignoreMap":7},[251],{"type":24,"value":246},{"type":18,"tag":32,"props":253,"children":254},{},[255,257,263],{"type":24,"value":256},"在",{"type":18,"tag":144,"props":258,"children":260},{"className":259},[],[261],{"type":24,"value":262},"image_folder_op",{"type":24,"value":264},"第91行，发现了图片处理的代码，代码如下：",{"type":18,"tag":139,"props":266,"children":268},{"code":267},"RETURN_IF_NOT_OK(Tensor::CreateFromFile(folder_path_ + (pair_ptr->first), &image));\n",[269],{"type":18,"tag":144,"props":270,"children":271},{"__ignoreMap":7},[272],{"type":24,"value":267},{"type":18,"tag":32,"props":274,"children":275},{},[276,278,284],{"type":24,"value":277},"该行代码用到了一个辅助方法",{"type":18,"tag":144,"props":279,"children":281},{"className":280},[],[282],{"type":24,"value":283},"CreateFromFile",{"type":24,"value":285},"，该方法定义和实现位置为：",{"type":18,"tag":139,"props":287,"children":289},{"code":288},"mindspore/ccsrc/minddata/dataset/core/tensor.h\nmindspore/ccsrc/minddata/dataset/core/tensor.cc\n",[290],{"type":18,"tag":144,"props":291,"children":292},{"__ignoreMap":7},[293],{"type":24,"value":288},{"type":18,"tag":32,"props":295,"children":296},{},[297],{"type":24,"value":298},"该方法的具体实现代码如下：",{"type":18,"tag":139,"props":300,"children":302},{"code":301},"Status Tensor::CreateFromFile(const std::string &path, std::shared_ptr *out) {\n  RETURN_UNEXPECTED_IF_NULL(out);\n  Path file(path);\n  if (file.IsDirectory()) {\n    RETURN_STATUS_UNEXPECTED(\"Invalid file found: \" + path + \", should be file, but got directory.\");\n  }\n  std::ifstream fs;\n  fs.open(path, std::ios::binary | std::ios::in);\n  CHECK_FAIL_RETURN_UNEXPECTED(!fs.fail(), \"Failed to open file: \" + path);\n  int64_t num_bytes = fs.seekg(0, std::ios::end).tellg();\n  CHECK_FAIL_RETURN_UNEXPECTED(num_bytes \u003C kDeMaxDim, \"Invalid file to allocate tensor memory, check path: \" + path);\n  CHECK_FAIL_RETURN_UNEXPECTED(fs.seekg(0, std::ios::beg).good(), \"Failed to find size of file, check path: \" + path);\n  RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape{num_bytes}, DataType(DataType::DE_UINT8), out));\n  int64_t written_bytes = fs.read(reinterpret_cast((*out)->GetMutableBuffer()), num_bytes).gcount();\n  if (!(written_bytes == num_bytes && fs.good())) {\n    fs.close();\n    RETURN_STATUS_UNEXPECTED(\"Error in writing to tensor, check path: \" + path);\n  }\n  fs.close();\n  return Status::OK();\n}\n",[303],{"type":18,"tag":144,"props":304,"children":305},{"__ignoreMap":7},[306],{"type":24,"value":301},{"type":18,"tag":32,"props":308,"children":309},{},[310],{"type":24,"value":311},"从该方法分析可知，Decode的输入实际上是图片的二进制数据，下面进行验证。",{"type":18,"tag":95,"props":313,"children":315},{"id":314},"_4-验证探究结果",[316],{"type":24,"value":317},"4. 验证探究结果",{"type":18,"tag":32,"props":319,"children":320},{},[321],{"type":24,"value":322},"测试源码如下：",{"type":18,"tag":139,"props":324,"children":326},{"code":325},"=== out: ===\n[[[  0  11  17]\n  [  0   2   7]\n  [ 13  11  14]\n  [ 11   1   2]\n  [ 24   9   6]\n  [ 43  25  21]\n  [ 30  16  15]\n  [ 34  29  26]\n  [ 38  42  43]\n  [ 38  48  49]]\n\n [[  1  10  15]\n  [  0   1   4]\n  [ 27  23  24]\n  [ 34  23  21]\n  [ 26   7   3]\n  [ 27   8   2]\n  [ 29  11   7]\n  [ 43  33  31]\n  [ 39  41  38]\n  [ 40  49  48]]\n\n [[  6  10  11]\n  [  3   2   0]\n  [ 14   4   2]\n  [ 33  16   9]\n  [ 58  33  26]\n  [ 44  19  12]\n  [ 19   0   0]\n  [ 39  24  17]\n  [ 48  45  40]\n  [ 53  55  50]]\n\n [[ 15  11   8]\n  [ 39  32  26]\n  [ 42  25  18]\n  [ 94  71  63]\n  [188 158 148]\n  [162 130 119]\n  [ 69  41  30]\n  [ 54  34  23]\n  [ 91  82  75]\n  [ 99  96  89]]\n\n [[ 13   4   0]\n  [ 28  15   7]\n  [ 48  25  17]\n  [115  85  74]\n  [198 160 147]\n  [195 157 144]\n  [125  91  79]\n  [ 70  44  31]\n  [ 75  61  52]\n  [ 83  74  65]]\n\n [[ 52  39  31]\n  [ 40  23  15]\n  [ 94  67  58]\n  [160 126 114]\n  [164 125 110]\n  [179 137 123]\n  [178 140 127]\n  [ 98  68  57]\n  [ 88  70  60]\n  [ 94  81  72]]\n\n [[ 38  23  18]\n  [ 35  16   9]\n  [ 96  67  59]\n  [172 138 128]\n  [170 130 118]\n  [164 122 110]\n  [159 121 110]\n  [ 83  53  43]\n  [103  84  77]\n  [106  93  85]]\n\n [[ 31  17  14]\n  [ 30  12   8]\n  [ 28   0   0]\n  [ 95  62  53]\n  [158 120 111]\n  [152 112 104]\n  [135  98  90]\n  [110  81  75]\n  [ 99  81  77]\n  [ 98  87  83]]\n\n [[ 13   4   5]\n  [ 31  20  18]\n  [ 88  67  64]\n  [120  91  87]\n  [123  88  84]\n  [130  93  87]\n  [159 128 125]\n  [216 192 190]\n  [180 168 168]\n  [166 160 160]]\n\n [[ 52  46  48]\n  [113 103 104]\n  [179 159 158]\n  [176 148 145]\n  [166 132 130]\n  [187 152 150]\n  [206 176 174]\n  [227 206 205]\n  [223 212 216]\n  [215 210 214]]]\n",[327],{"type":18,"tag":144,"props":328,"children":329},{"__ignoreMap":7},[330],{"type":24,"value":325},{"type":18,"tag":32,"props":332,"children":333},{},[334],{"type":24,"value":335},"输出结果：",{"type":18,"tag":139,"props":337,"children":339},{"code":338},"=== PIL out: ===\n[[[  0  11  17]\n  [  0   2   7]\n  [ 13  11  14]\n  [ 11   1   2]\n  [ 24   9   6]\n  [ 43  25  21]\n  [ 30  16  15]\n  [ 34  29  26]\n  [ 38  42  43]\n  [ 38  48  49]]\n\n [[  1  10  15]\n  [  0   1   4]\n  [ 27  23  24]\n  [ 34  23  21]\n  [ 26   7   3]\n  [ 27   8   2]\n  [ 29  11   7]\n  [ 43  33  31]\n  [ 39  41  38]\n  [ 40  49  48]]\n\n [[  6  10  11]\n  [  3   2   0]\n  [ 14   4   2]\n  [ 33  16   9]\n  [ 58  33  26]\n  [ 44  19  12]\n  [ 19   0   0]\n  [ 39  24  17]\n  [ 48  45  40]\n  [ 53  55  50]]\n\n [[ 15  11   8]\n  [ 39  32  26]\n  [ 42  25  18]\n  [ 94  71  63]\n  [188 158 148]\n  [162 130 119]\n  [ 69  41  30]\n  [ 54  34  23]\n  [ 91  82  75]\n  [ 99  96  89]]\n\n [[ 13   4   0]\n  [ 28  15   7]\n  [ 48  25  17]\n  [115  85  74]\n  [198 160 147]\n  [195 157 144]\n  [125  91  79]\n  [ 70  44  31]\n  [ 75  61  52]\n  [ 83  74  65]]\n\n [[ 52  39  31]\n  [ 40  23  15]\n  [ 94  67  58]\n  [160 126 114]\n  [164 125 110]\n  [179 137 123]\n  [178 140 127]\n  [ 98  68  57]\n  [ 88  70  60]\n  [ 94  81  72]]\n\n [[ 38  23  18]\n  [ 35  16   9]\n  [ 96  67  59]\n  [172 138 128]\n  [170 130 118]\n  [164 122 110]\n  [159 121 110]\n  [ 83  53  43]\n  [103  84  77]\n  [106  93  85]]\n\n [[ 31  17  14]\n  [ 30  12   8]\n  [ 28   0   0]\n  [ 95  62  53]\n  [158 120 111]\n  [152 112 104]\n  [135  98  90]\n  [110  81  75]\n  [ 99  81  77]\n  [ 98  87  83]]\n\n [[ 13   4   5]\n  [ 31  20  18]\n  [ 88  67  64]\n  [120  91  87]\n  [123  88  84]\n  [130  93  87]\n  [159 128 125]\n  [216 192 190]\n  [180 168 168]\n  [166 160 160]]\n\n [[ 52  46  48]\n  [113 103 104]\n  [179 159 158]\n  [176 148 145]\n  [166 132 130]\n  [187 152 150]\n  [206 176 174]\n  [227 206 205]\n  [223 212 216]\n  [215 210 214]]]\n",[340],{"type":18,"tag":144,"props":341,"children":342},{"__ignoreMap":7},[343],{"type":24,"value":338},{"type":18,"tag":32,"props":345,"children":346},{},[347],{"type":24,"value":348},"看起来结果没什么问题，为了确认是否正确，下面再用PIL库做对比测试。",{"type":18,"tag":32,"props":350,"children":351},{},[352],{"type":24,"value":353},"对比测试代码如下：",{"type":18,"tag":139,"props":355,"children":357},{"code":356},"import numpy as np\n\nfrom PIL import Image\n\nimage_file = \"/Users/kaierlong/Downloads/ms_demos/ms_gan/image/0000.jpg\"\nimage = np.array(Image.open(image_file))\n\nprint(\"=== PIL out: ===\\n{}\".format(image), flush=True)\n",[358],{"type":18,"tag":144,"props":359,"children":360},{"__ignoreMap":7},[361],{"type":24,"value":356},{"type":18,"tag":32,"props":363,"children":364},{},[365],{"type":24,"value":366},"对比测试结果为：",{"type":18,"tag":32,"props":368,"children":369},{},[370],{"type":24,"value":371},"可以看出两者的输出是一致的，对官方源码的分析是正确的。",{"type":18,"tag":95,"props":373,"children":375},{"id":374},"_5-实战案例分析",[376],{"type":24,"value":377},"5. 实战案例分析",{"type":18,"tag":32,"props":379,"children":380},{},[381],{"type":24,"value":382},"下面结合MindRecord来做一个数据生成及读取的实战案例。",{"type":18,"tag":32,"props":384,"children":385},{},[386],{"type":24,"value":387},"在下面案例中，笔者准备了5000张图片，读者可自行准备。",{"type":18,"tag":122,"props":389,"children":391},{"id":390},"_51-数据生成部分",[392],{"type":24,"value":393},"5.1 数据生成部分",{"type":18,"tag":32,"props":395,"children":396},{},[397],{"type":24,"value":398},"数据生成代码如下：",{"type":18,"tag":139,"props":400,"children":402},{"code":401},"def generate_dataset(data_dir, image_list_file, mindrecord_dir, num_train_shard=4, num_test_shard=2):\n    data_schema = {\n        \"image\": {\"type\": \"bytes\"},\n        \"label\": {\"type\": \"int32\"}\n    }\n\n    train_writer = FileWriter(\n        file_name=os.path.join(mindrecord_dir, \"train.mindrecord\"), shard_num=num_train_shard)\n    test_writer = FileWriter(\n        file_name=os.path.join(mindrecord_dir, \"test.mindrecord\"), shard_num=num_test_shard)\n\n    train_writer.add_schema(data_schema, \"train\")\n    test_writer.add_schema(data_schema, \"test\")\n\n    num_all_samples = 0\n    num_train_samples = 0\n    num_test_samples = 0\n\n    # 用来放置一定数据的样本数据，加速数据写入。\n    # 这里总体样本数比较少，体现不出加速效果。\n    train_tmp_samples = []\n    test_tmp_samples = []\n\n    with codecs.open(image_list_file, \"r\", \"UTF8\") as image_list_fp:\n        for line in image_list_fp:\n            line = line.strip()\n            if not line:\n                continue\n\n            # 判断图片是否存在\n            image_path = os.path.join(data_dir, line)\n            if not os.path.exists(image_path):\n                print(\"image: {} not exists!\".format(line), flush=True)\n                continue\n\n            # 读取图片数据\n            image_fp = codecs.open(image_path, \"rb\")\n            image_data = image_fp.read()\n            image_fp.close()\n            num_all_samples += 1\n            # 伪造标签数据，实际项目中会有真是的标签数据\n            label_data = num_all_samples % 10\n\n            sample = {\n                \"image\": image_data,\n                \"label\": label_data,\n            }\n\n            # 按照4:1比例生成训练集和测试集\n            if num_all_samples % 5 == 0:\n                test_tmp_samples.append(sample)\n                num_test_samples += 1\n                if num_test_samples % 10 == 0:\n                    test_writer.write_raw_data(test_tmp_samples)\n                    test_tmp_samples = []\n            else:\n                train_tmp_samples.append(sample)\n                num_train_samples += 1\n                if num_train_samples % 10 == 0:\n                    train_writer.write_raw_data(train_tmp_samples)\n                    train_tmp_samples = []\n\n    if train_tmp_samples:\n        train_writer.write_raw_data(train_tmp_samples)\n    if test_tmp_samples:\n        test_writer.write_raw_data(test_tmp_samples)\n                    \n    train_writer.commit()\n    test_writer.commit()\n\n    print(\"====== number of all samples: {} \".format(num_all_samples), flush=True)\n    print(\"====== number of train samples: {} \".format(num_train_samples), flush=True)\n    print(\"====== number of test samples: {} \".format(num_test_samples), flush=True)\n",[403],{"type":18,"tag":144,"props":404,"children":405},{"__ignoreMap":7},[406],{"type":24,"value":401},{"type":18,"tag":32,"props":408,"children":409},{},[410],{"type":24,"value":411},"其中",{"type":18,"tag":42,"props":413,"children":414},{},[415,420,425],{"type":18,"tag":46,"props":416,"children":417},{},[418],{"type":24,"value":419},"data_dir为图片数据存储目录",{"type":18,"tag":46,"props":421,"children":422},{},[423],{"type":24,"value":424},"image_list_file为图片名列表文件，每行一个图片名，",{"type":18,"tag":46,"props":426,"children":427},{},[428],{"type":24,"value":429},"mindrecord_dir为MindRecord数据保存目录",{"type":18,"tag":32,"props":431,"children":432},{},[433],{"type":24,"value":434},"数据生成测试",{"type":18,"tag":32,"props":436,"children":437},{},[438],{"type":24,"value":137},{"type":18,"tag":139,"props":440,"children":442},{"code":441},"data_dir = \"/Users/kaierlong/Downloads/ms_demos/ms_gan/image/data\"\nimage_list_file = \"/Users/kaierlong/Downloads/ms_demos/ms_gan/image/list.txt\"\nmindrecord_dir = \"/Users/kaierlong/Downloads/ms_demos/ms_gan/image/mindrecord\"\ngenerate_dataset(data_dir=data_dir, image_list_file=image_list_file, mindrecord_dir=mindrecord_dir)\n",[443],{"type":18,"tag":144,"props":444,"children":445},{"__ignoreMap":7},[446],{"type":24,"value":441},{"type":18,"tag":32,"props":448,"children":449},{},[450],{"type":24,"value":451},"测试输出如下内容：",{"type":18,"tag":139,"props":453,"children":455},{"code":454},"====== number of all samples: 5000 \n====== number of train samples: 4000 \n====== number of test samples: 1000\n",[456],{"type":18,"tag":144,"props":457,"children":458},{"__ignoreMap":7},[459],{"type":24,"value":454},{"type":18,"tag":32,"props":461,"children":462},{},[463],{"type":24,"value":464},"使用tree命令查看mindrecord_dir目录，目录内容如下：",{"type":18,"tag":139,"props":466,"children":468},{"code":467},"mindrecord/\n├── test.mindrecord0\n├── test.mindrecord0.db\n├── test.mindrecord1\n├── test.mindrecord1.db\n├── train.mindrecord0\n├── train.mindrecord0.db\n├── train.mindrecord1\n├── train.mindrecord1.db\n├── train.mindrecord2\n├── train.mindrecord2.db\n├── train.mindrecord3\n└── train.mindrecord3.db\n\n0 directories, 12 files\n",[469],{"type":18,"tag":144,"props":470,"children":471},{"__ignoreMap":7},[472],{"type":24,"value":467},{"type":18,"tag":122,"props":474,"children":476},{"id":475},"_52-数据读取部分",[477],{"type":24,"value":478},"5.2 数据读取部分",{"type":18,"tag":32,"props":480,"children":481},{},[482],{"type":24,"value":483},"数据读取代码如下，这里用到了Decode：",{"type":18,"tag":139,"props":485,"children":487},{"code":486},"def create_dataset(mindrecord_dir, usage=\"train\", batch_size=1, num_workers=2):\n    if usage == \"train\":\n        data_file_name = \"train.mindrecord0\"\n        shuffle = True\n    else:\n        data_file_name = \"test.mindrecord0\"\n        shuffle = False\n\n    dataset_path = os.path.join(mindrecord_dir, data_file_name)\n\n    dataset = MindDataset(\n        dataset_path, columns_list=[\"image\", \"label\"],\n        num_parallel_workers=num_workers, shuffle=shuffle, num_shards=None, shard_id=None)\n\n    dataset = dataset.map(\n        operations=Decode(), input_columns=[\"image\"], output_columns=[\"image\"], num_parallel_workers=num_workers)\n\n    dataset = dataset.batch(batch_size, drop_remainder=True)\n\n    return dataset\n",[488],{"type":18,"tag":144,"props":489,"children":490},{"__ignoreMap":7},[491],{"type":24,"value":486},{"type":18,"tag":32,"props":493,"children":494},{},[495],{"type":24,"value":496},"数据读取测试",{"type":18,"tag":32,"props":498,"children":499},{},[500],{"type":24,"value":137},{"type":18,"tag":139,"props":502,"children":504},{"code":503},"mindrecord_dir = \"/Users/kaierlong/Downloads/ms_demos/ms_gan/image/mindrecord\"\ndataset = create_dataset(mindrecord_dir=mindrecord_dir, usage=\"test\")\ndata_size = dataset.get_dataset_size()\nprint(\"====== data size: {} ======\".format(data_size), flush=True)\n\nsample = None\nfor item in dataset:\n    sample = item\n    break\nprint(\"====== sample: ======\\n{}\".format(sample), flush=True)\n",[505],{"type":18,"tag":144,"props":506,"children":507},{"__ignoreMap":7},[508],{"type":24,"value":503},{"type":18,"tag":32,"props":510,"children":511},{},[512],{"type":24,"value":451},{"type":18,"tag":139,"props":514,"children":516},{"code":515},"====== data size: 1000 ======\n====== sample: ======\n[Tensor(shape=[1, 218, 178, 3], dtype=UInt8, value=\n[[[[152, 197, 203],\n   [150, 197, 203],\n   [150, 198, 202],\n   ...\n   [142, 202, 200],\n   [142, 202, 200],\n   [142, 202, 200]],\n  [[152, 197, 203],\n   [150, 197, 203],\n   [151, 199, 203],\n   ...\n   [142, 202, 200],\n   [142, 202, 200],\n   [142, 202, 200]],\n  [[152, 197, 203],\n   [151, 198, 204],\n   [151, 199, 203],\n   ...\n   [141, 203, 202],\n   [141, 203, 202],\n   [141, 203, 202]],\n  ...\n  [[146, 200, 202],\n   [146, 200, 202],\n   [146, 200, 202],\n   ...\n   [145, 199, 201],\n   [141, 192, 195],\n   [141, 192, 195]],\n  [[145, 199, 201],\n   [146, 200, 202],\n   [146, 200, 202],\n   ...\n   [145, 199, 201],\n   [146, 197, 200],\n   [146, 197, 200]],\n  [[145, 199, 201],\n   [146, 200, 202],\n   [146, 200, 202],\n   ...\n   [145, 199, 201],\n   [148, 199, 202],\n   [148, 199, 202]]]]), Tensor(shape=[1], dtype=Int32, value= [5])]\n",[517],{"type":18,"tag":144,"props":518,"children":519},{"__ignoreMap":7},[520],{"type":24,"value":515},{"type":18,"tag":122,"props":522,"children":524},{"id":523},"_53-验证读取结果",[525],{"type":24,"value":526},"5.3 验证读取结果",{"type":18,"tag":32,"props":528,"children":529},{},[530,532,538],{"type":24,"value":531},"找到",{"type":18,"tag":144,"props":533,"children":535},{"className":534},[],[536],{"type":24,"value":537},"5.2",{"type":24,"value":539},"中读取的图片（如果读者顺序一致的话，应该是第五张图片），用PIL进行读取测试。",{"type":18,"tag":32,"props":541,"children":542},{},[543],{"type":24,"value":137},{"type":18,"tag":32,"props":545,"children":546},{},[547],{"type":24,"value":548},"注意替换代码中image_file为读者真实路径",{"type":18,"tag":139,"props":550,"children":552},{"code":551},"import numpy as np\nfrom PIL import Image\n\nimage_file = \"/Users/kaierlong/Downloads/ms_demos/ms_gan/image/data/000005.jpg\"\nimage = Image.open(image_file)\nimage_data = np.asarray(image)\nprint(\"====== PIL output: ======\\n{}\".format(image_data), flush=True)\n",[553],{"type":18,"tag":144,"props":554,"children":555},{"__ignoreMap":7},[556],{"type":24,"value":551},{"type":18,"tag":32,"props":558,"children":559},{},[560],{"type":24,"value":451},{"type":18,"tag":139,"props":562,"children":564},{"code":563},"====== PIL output: ======\n[[[152 197 203]\n  [150 197 203]\n  [150 198 202]\n  ...\n  [142 202 200]\n  [142 202 200]\n  [142 202 200]]\n\n [[152 197 203]\n  [150 197 203]\n  [151 199 203]\n  ...\n  [142 202 200]\n  [142 202 200]\n  [142 202 200]]\n\n [[152 197 203]\n  [151 198 204]\n  [151 199 203]\n  ...\n  [141 203 202]\n  [141 203 202]\n  [141 203 202]]\n\n ...\n\n [[146 200 202]\n  [146 200 202]\n  [146 200 202]\n  ...\n  [145 199 201]\n  [141 192 195]\n  [141 192 195]]\n\n [[145 199 201]\n  [146 200 202]\n  [146 200 202]\n  ...\n  [145 199 201]\n  [146 197 200]\n  [146 197 200]]\n\n [[145 199 201]\n  [146 200 202]\n  [146 200 202]\n  ...\n  [145 199 201]\n  [148 199 202]\n  [148 199 202]]]\n",[565],{"type":18,"tag":144,"props":566,"children":567},{"__ignoreMap":7},[568],{"type":24,"value":563},{"type":18,"tag":32,"props":570,"children":571},{},[572,574,579,581,587],{"type":24,"value":573},"可以看到",{"type":18,"tag":144,"props":575,"children":577},{"className":576},[],[578],{"type":24,"value":537},{"type":24,"value":580},"和",{"type":18,"tag":144,"props":582,"children":584},{"className":583},[],[585],{"type":24,"value":586},"5.3",{"type":24,"value":588},"的读取结果是一致的。",{"type":18,"tag":95,"props":590,"children":592},{"id":591},"本文总结",[593],{"type":24,"value":591},{"type":18,"tag":32,"props":595,"children":596},{},[597],{"type":24,"value":598},"本文探究了vision.c_transforms下的Decode算子，并结合一个数据生成和读取的案例进一步讲述如何使用该算子。",{"type":18,"tag":95,"props":600,"children":602},{"id":601},"本文参考",[603],{"type":24,"value":601},{"type":18,"tag":42,"props":605,"children":606},{},[607],{"type":18,"tag":46,"props":608,"children":609},{},[610],{"type":18,"tag":611,"props":612,"children":616},"a",{"href":613,"rel":614},"https://www.mindspore.cn/docs/zh-CN/r1.6/api_python/dataset_vision/mindspore.dataset.vision.c_transforms.Decode.html",[615],"nofollow",[617],{"type":24,"value":618},"Decode文档说明",{"title":7,"searchDepth":620,"depth":620,"links":621},4,[622,624,629,630,631,636,637],{"id":97,"depth":623,"text":100},2,{"id":117,"depth":623,"text":120,"children":625},[626,628],{"id":124,"depth":627,"text":127},3,{"id":163,"depth":627,"text":166},{"id":217,"depth":623,"text":220},{"id":314,"depth":623,"text":317},{"id":374,"depth":623,"text":377,"children":632},[633,634,635],{"id":390,"depth":627,"text":393},{"id":475,"depth":627,"text":478},{"id":523,"depth":627,"text":526},{"id":591,"depth":623,"text":591},{"id":601,"depth":623,"text":601},"markdown","content:technology-blogs:zh:1774.md","content","technology-blogs/zh/1774.md","technology-blogs/zh/1774","md",1776506115742]