[{"data":1,"prerenderedAt":206},["ShallowReactive",2],{"content-query-qO2AzDaI3O":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"category":13,"body":14,"_type":200,"_id":201,"_source":202,"_file":203,"_stem":204,"_extension":205},"/technology-blogs/zh/3215","zh",false,"","基于MindSpore案例的香橙派开发板离线推理实践——图片文本识别","文本识别指从图像中识别出文本，将图像中的文字区域转化为字符信息，通常采用CNN网络从图像中提取丰富的特征信息，然后根据提取的特征信息进行识别。","2024-06-29","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/07/12/e4dd9bc6636a43248aa97b5350ef5b7a.png","technology-blogs","调试调优",{"type":15,"children":16,"toc":187},"root",[17,25,31,37,42,111,116,121,128,136,142,150,156,164,169,174],{"type":18,"tag":19,"props":20,"children":22},"element","h1",{"id":21},"基于mindspore案例的香橙派开发板离线推理实践图片文本识别",[23],{"type":24,"value":8},"text",{"type":18,"tag":26,"props":27,"children":29},"h2",{"id":28},"样例介绍",[30],{"type":24,"value":28},{"type":18,"tag":32,"props":33,"children":34},"p",{},[35],{"type":24,"value":36},"文本识别指从图像中识别出文本，将图像中的文字区域转化为字符信息，通常采用CNN网络从图像中提取丰富的特征信息，然后根据提取的特征信息进行识别。这里采用ResNet作为特征提取网络，采用CTC(Connectionist Temporal Classification)方法进行识别。 此脚本用于将cnnctc模型ckpt文件转换成AIR文件，再转换成OM文件，最后进行离线推理。",{"type":18,"tag":26,"props":38,"children":40},{"id":39},"前期准备",[41],{"type":24,"value":39},{"type":18,"tag":43,"props":44,"children":45},"ul",{},[46,52,57,62,75,80,85,90,95,100],{"type":18,"tag":47,"props":48,"children":49},"li",{},[50],{"type":24,"value":51},"基础镜像的样例目录中已包含转换后的om模型以及测试图片，如果直接运行，可跳过此步骤。如果需要重新转换模型，可以参考下面的步骤。",{"type":18,"tag":47,"props":53,"children":54},{},[55],{"type":24,"value":56},"建议在Linux服务器或者虚拟机转换该模型。",{"type":18,"tag":47,"props":58,"children":59},{},[60],{"type":24,"value":61},"为了能进一步优化模型推理性能，我们需要将其转换为om模型进行使用 以下为转换指令：",{"type":18,"tag":47,"props":63,"children":64},{},[65],{"type":18,"tag":66,"props":67,"children":69},"pre",{"code":68},"atc --model=cnnctc.air --output=\"cnnctc\" --framework=1 --soc_version=Ascend310B4 --output_type=FP32 --precision_mode=allow_fp32_to_fp16 --log=info\n",[70],{"type":18,"tag":71,"props":72,"children":73},"code",{"__ignoreMap":7},[74],{"type":24,"value":68},{"type":18,"tag":47,"props":76,"children":77},{},[78],{"type":24,"value":79},"其中转换参数的含义为：",{"type":18,"tag":47,"props":81,"children":82},{},[83],{"type":24,"value":84},"--model：输入模型路径",{"type":18,"tag":47,"props":86,"children":87},{},[88],{"type":24,"value":89},"--framework：原始网络模型框架类型，1表示air，5表示ONNX",{"type":18,"tag":47,"props":91,"children":92},{},[93],{"type":24,"value":94},"--output：输出模型路径",{"type":18,"tag":47,"props":96,"children":97},{},[98],{"type":24,"value":99},"--log：日志级别",{"type":18,"tag":47,"props":101,"children":102},{},[103,105,109],{"type":24,"value":104},"--soc_version：昇腾AI处理器型号",{"type":18,"tag":106,"props":107,"children":108},"br",{},[],{"type":24,"value":110},"!sh env.sh # cell中运行此代码",{"type":18,"tag":26,"props":112,"children":114},{"id":113},"模型推理实现",[115],{"type":24,"value":113},{"type":18,"tag":32,"props":117,"children":118},{},[119],{"type":24,"value":120},"得到cnnctc.om后，执行离线推理代码,加载推理图片predict.png",{"type":18,"tag":122,"props":123,"children":125},"h3",{"id":124},"_1-导入三方库",[126],{"type":24,"value":127},"1. 导入三方库",{"type":18,"tag":66,"props":129,"children":131},{"code":130},"import os\n\nimport time\nimport argparse\n\nimport matplotlib.pyplot as plt\nfrom PIL import Image\nimport numpy as np\n\nfrom acllite_model import AclLiteModel as Model\nfrom acllite_resource import AclLiteResource as AclResource\n",[132],{"type":18,"tag":71,"props":133,"children":134},{"__ignoreMap":7},[135],{"type":24,"value":130},{"type":18,"tag":122,"props":137,"children":139},{"id":138},"_2-模型导入和处理",[140],{"type":24,"value":141},"2. 模型导入和处理",{"type":18,"tag":66,"props":143,"children":145},{"code":144},"# 获取模型om文件\nfrom download import download\nmodel_url = \"https://mindspore-courses.obs.cn-north-4.myhuaweicloud.com/orange-pi-mindspore/02-CNNCTC/cnnctc.zip\"\ndownload(model_url, \"./\", kind=\"zip\", replace=True)\n\n# om模型和图片的位置\nMODEL_PATH = './cnnctc.om'\nIMAGE_PATH = './predict.png'\n\n# 初始化acl资源\nacl_resource = AclResource()\nacl_resource.init()\n\n#导入本地om模型\nprint('load model....')\nmodel = Model(MODEL_PATH)\nprint('load model finished....')\n\n# 文本与数据编码\nclass CTCLabelConverter():\n    def __init__(self, character):\n        dict_character = list(character)\n        self.dict = {}\n        for i, char in enumerate(dict_character):\n            self.dict[char] = i + 1\n        self.character = ['[blank]'] + dict_character\n        self.dict['[blank]'] = 0\n\n    #将文本转换为数字编码\n    def encode(self, text):\n        length = [len(s) for s in text]\n        text = ''.join(text)\n        text = [self.dict[char] for char in text]\n\n        return np.array(text), np.array(length)\n\n    # 将数字编码转换为文本\n    def decode(self, text_index, length):\n        texts = []\n        index = 0\n        for l in length:\n            t = text_index[index:index + l]\n            char_list = []\n            for i in range(l):\n                if t[i] != self.dict['[blank]'] and (\n                        not (i > 0 and t[i - 1] == t[i])):\n                    char_list.append(self.character[t[i]])\n            text = ''.join(char_list)\n            texts.append(text)\n            index += l\n        return texts\n",[146],{"type":18,"tag":71,"props":147,"children":148},{"__ignoreMap":7},[149],{"type":24,"value":144},{"type":18,"tag":122,"props":151,"children":153},{"id":152},"_3-进行推理",[154],{"type":24,"value":155},"3. 进行推理",{"type":18,"tag":66,"props":157,"children":159},{"code":158},"# 导入和处理目标图片\nimg_PIL = Image.open(IMAGE_PATH).convert('RGB')\nimg = img_PIL.resize((100, 32), resample=3)\nimg = np.array(img, dtype=np.float32)\nimg = np.expand_dims(img, axis=0) \nimg = np.transpose(img, [0, 3, 1, 2]) \n\n# 定义推理的时间\nstart = time.time()\nmodel_predict = model.execute([img])[0]\nend = time.time()\nprint(f'infer use time:{(end-start)*1000}ms')\n\n# 初始化文本编码函数\ncharacter = '0123456789abcdefghijklmnopqrstuvwxyz'\nconverter = CTCLabelConverter(character)\n\n# 推理过程\npreds_size = np.array([model_predict.shape[1]])\npreds_index = np.argmax(model_predict, 2)\npreds_index = np.reshape(preds_index, [-1])\npreds_str = converter.decode(preds_index, preds_size)\nprint('Predict: ', preds_str)\n",[160],{"type":18,"tag":71,"props":161,"children":162},{"__ignoreMap":7},[163],{"type":24,"value":158},{"type":18,"tag":26,"props":165,"children":167},{"id":166},"总结与扩展",[168],{"type":24,"value":166},{"type":18,"tag":32,"props":170,"children":171},{},[172],{"type":24,"value":173},"以上就是cnnctc文本识别样例离线推理的运行结果了，可以看到最后的验证结果，成功识别了示例图片中‘PARKING’的字样。 注意：",{"type":18,"tag":43,"props":175,"children":176},{},[177,182],{"type":18,"tag":47,"props":178,"children":179},{},[180],{"type":24,"value":181},"若出现推理失败的情况，请确保以root权限设置好环境变量（运行或参考文件夹内的env.sh文件）。",{"type":18,"tag":47,"props":183,"children":184},{},[185],{"type":24,"value":186},"再次进行推理清清空所有缓存。",{"title":7,"searchDepth":188,"depth":188,"links":189},4,[190,192,193,199],{"id":28,"depth":191,"text":28},2,{"id":39,"depth":191,"text":39},{"id":113,"depth":191,"text":113,"children":194},[195,197,198],{"id":124,"depth":196,"text":127},3,{"id":138,"depth":196,"text":141},{"id":152,"depth":196,"text":155},{"id":166,"depth":191,"text":166},"markdown","content:technology-blogs:zh:3215.md","content","technology-blogs/zh/3215.md","technology-blogs/zh/3215","md",1776506127414]